Faster CVTT by reducing quality.

Make BC6 and BC7 CVTT faster while still having better quality than DXT5.
This commit is contained in:
K. S. Ernest (iFire) Lee 2022-01-18 04:39:55 -08:00
parent 992794e44a
commit 419b342a9a
44 changed files with 14756 additions and 7734 deletions

View file

@ -30,14 +30,17 @@
#include "image.h" #include "image.h"
#include "core/error/error_list.h"
#include "core/error/error_macros.h" #include "core/error/error_macros.h"
#include "core/io/image_loader.h" #include "core/io/image_loader.h"
#include "core/io/resource_loader.h" #include "core/io/resource_loader.h"
#include "core/math/math_funcs.h" #include "core/math/math_funcs.h"
#include "core/string/print_string.h" #include "core/string/print_string.h"
#include "core/templates/hash_map.h" #include "core/templates/hash_map.h"
#include "core/variant/dictionary.h"
#include <stdio.h> #include <stdio.h>
#include <cmath>
const char *Image::format_names[Image::FORMAT_MAX] = { const char *Image::format_names[Image::FORMAT_MAX] = {
"Lum8", //luminance "Lum8", //luminance
@ -3135,6 +3138,8 @@ void Image::_bind_methods() {
ClassDB::bind_method(D_METHOD("rgbe_to_srgb"), &Image::rgbe_to_srgb); ClassDB::bind_method(D_METHOD("rgbe_to_srgb"), &Image::rgbe_to_srgb);
ClassDB::bind_method(D_METHOD("bump_map_to_normal_map", "bump_scale"), &Image::bump_map_to_normal_map, DEFVAL(1.0)); ClassDB::bind_method(D_METHOD("bump_map_to_normal_map", "bump_scale"), &Image::bump_map_to_normal_map, DEFVAL(1.0));
ClassDB::bind_method(D_METHOD("compute_image_metrics", "compared_image", "use_luma"), &Image::compute_image_metrics);
ClassDB::bind_method(D_METHOD("blit_rect", "src", "src_rect", "dst"), &Image::blit_rect); ClassDB::bind_method(D_METHOD("blit_rect", "src", "src_rect", "dst"), &Image::blit_rect);
ClassDB::bind_method(D_METHOD("blit_rect_mask", "src", "mask", "src_rect", "dst"), &Image::blit_rect_mask); ClassDB::bind_method(D_METHOD("blit_rect_mask", "src", "mask", "src_rect", "dst"), &Image::blit_rect_mask);
ClassDB::bind_method(D_METHOD("blend_rect", "src", "src_rect", "dst"), &Image::blend_rect); ClassDB::bind_method(D_METHOD("blend_rect", "src", "src_rect", "dst"), &Image::blend_rect);
@ -3620,3 +3625,128 @@ Ref<Resource> Image::duplicate(bool p_subresources) const {
void Image::set_as_black() { void Image::set_as_black() {
memset(data.ptrw(), 0, data.size()); memset(data.ptrw(), 0, data.size());
} }
Dictionary Image::compute_image_metrics(const Ref<Image> p_compared_image, bool p_luma_metric) {
// https://github.com/richgel999/bc7enc_rdo/blob/master/LICENSE
//
// This is free and unencumbered software released into the public domain.
// Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
// software, either in source code form or as a compiled binary, for any purpose,
// commercial or non - commercial, and by any means.
// In jurisdictions that recognize copyright laws, the author or authors of this
// software dedicate any and all copyright interest in the software to the public
// domain. We make this dedication for the benefit of the public at large and to
// the detriment of our heirs and successors. We intend this dedication to be an
// overt act of relinquishment in perpetuity of all present and future rights to
// this software under copyright law.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE
// AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
// ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
// WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Dictionary result;
result["max"] = INFINITY;
result["mean"] = INFINITY;
result["mean_squared"] = INFINITY;
result["root_mean_squared"] = INFINITY;
result["peak_snr"] = 0.0f;
ERR_FAIL_NULL_V(p_compared_image, result);
Error err = OK;
Ref<Image> compared_image = duplicate(true);
if (compared_image->is_compressed()) {
err = compared_image->decompress();
}
ERR_FAIL_COND_V(err != OK, result);
Ref<Image> source_image = p_compared_image->duplicate(true);
if (source_image->is_compressed()) {
err = source_image->decompress();
}
ERR_FAIL_COND_V(err != OK, result);
ERR_FAIL_COND_V(err != OK, result);
ERR_FAIL_COND_V_MSG((compared_image->get_format() >= Image::FORMAT_RH) && (compared_image->get_format() <= Image::FORMAT_RGBE9995), result, "Metrics on HDR images are not supported.");
ERR_FAIL_COND_V_MSG((source_image->get_format() >= Image::FORMAT_RH) && (source_image->get_format() <= Image::FORMAT_RGBE9995), result, "Metrics on HDR images are not supported.");
double image_metric_max, image_metric_mean, image_metric_mean_squared, image_metric_root_mean_squared, image_metric_peak_snr = 0.0;
const bool average_component_error = true;
const uint32_t width = MIN(compared_image->get_width(), source_image->get_width());
const uint32_t height = MIN(compared_image->get_height(), source_image->get_height());
// Histogram approach originally due to Charles Bloom.
double hist[256];
memset(hist, 0, sizeof(hist));
for (uint32_t y = 0; y < height; y++) {
for (uint32_t x = 0; x < width; x++) {
const Color color_a = compared_image->get_pixel(x, y);
const Color color_b = source_image->get_pixel(x, y);
if (!p_luma_metric) {
ERR_FAIL_COND_V_MSG(color_a.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
ERR_FAIL_COND_V_MSG(color_b.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
hist[Math::abs(color_a.get_r8() - color_b.get_r8())]++;
ERR_FAIL_COND_V_MSG(color_a.g > 1.0f, Dictionary(), "Can't compare HDR colors.");
ERR_FAIL_COND_V_MSG(color_b.g > 1.0f, Dictionary(), "Can't compare HDR colors.");
hist[Math::abs(color_a.get_g8() - color_b.get_g8())]++;
ERR_FAIL_COND_V_MSG(color_a.b > 1.0f, Dictionary(), "Can't compare HDR colors.");
ERR_FAIL_COND_V_MSG(color_b.b > 1.0f, Dictionary(), "Can't compare HDR colors.");
hist[Math::abs(color_a.get_b8() - color_b.get_b8())]++;
ERR_FAIL_COND_V_MSG(color_a.a > 1.0f, Dictionary(), "Can't compare HDR colors.");
ERR_FAIL_COND_V_MSG(color_b.a > 1.0f, Dictionary(), "Can't compare HDR colors.");
hist[Math::abs(color_a.get_a8() - color_b.get_a8())]++;
} else {
ERR_FAIL_COND_V_MSG(color_a.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
ERR_FAIL_COND_V_MSG(color_b.r > 1.0f, Dictionary(), "Can't compare HDR colors.");
// REC709 weightings
int luma_a = (13938U * color_a.get_r8() + 46869U * color_a.get_g8() + 4729U * color_a.get_b8() + 32768U) >> 16U;
int luma_b = (13938U * color_b.get_r8() + 46869U * color_b.get_g8() + 4729U * color_b.get_b8() + 32768U) >> 16U;
hist[Math::abs(luma_a - luma_b)]++;
}
}
}
image_metric_max = 0;
double sum = 0.0f, sum2 = 0.0f;
for (uint32_t i = 0; i < 256; i++) {
if (!hist[i]) {
continue;
}
image_metric_max = MAX(image_metric_max, i);
double x = i * hist[i];
sum += x;
sum2 += i * x;
}
// See http://richg42.blogspot.com/2016/09/how-to-compute-psnr-from-old-berkeley.html
double total_values = width * height;
if (average_component_error) {
total_values *= 4;
}
image_metric_mean = CLAMP(sum / total_values, 0.0f, 255.0f);
image_metric_mean_squared = CLAMP(sum2 / total_values, 0.0f, 255.0f * 255.0f);
image_metric_root_mean_squared = sqrt(image_metric_mean_squared);
if (!image_metric_root_mean_squared) {
image_metric_peak_snr = 1e+10f;
} else {
image_metric_peak_snr = CLAMP(log10(255.0f / image_metric_root_mean_squared) * 20.0f, 0.0f, 500.0f);
}
result["max"] = image_metric_max;
result["mean"] = image_metric_mean;
result["mean_squared"] = image_metric_mean_squared;
result["root_mean_squared"] = image_metric_root_mean_squared;
result["peak_snr"] = image_metric_peak_snr;
return result;
}

View file

@ -399,6 +399,8 @@ public:
mipmaps = p_image->mipmaps; mipmaps = p_image->mipmaps;
data = p_image->data; data = p_image->data;
} }
Dictionary compute_image_metrics(const Ref<Image> p_compared_image, bool p_luma_metric = true);
}; };
VARIANT_ENUM_CAST(Image::Format) VARIANT_ENUM_CAST(Image::Format)

View file

@ -88,6 +88,15 @@
<description> <description>
</description> </description>
</method> </method>
<method name="compute_image_metrics">
<return type="Dictionary" />
<argument index="0" name="compared_image" type="Image" />
<argument index="1" name="use_luma" type="bool" />
<description>
Compute image metrics on the current image and the compared image.
The dictionary contains [code]max[/code], [code]mean[/code], [code]mean_squared[/code], [code]root_mean_squared[/code] and [code]peak_snr[/code].
</description>
</method>
<method name="convert"> <method name="convert">
<return type="void" /> <return type="void" />
<argument index="0" name="format" type="int" enum="Image.Format" /> <argument index="0" name="format" type="int" enum="Image.Format" />

View file

@ -32,8 +32,10 @@
#include "resource_importer_texture.h" #include "resource_importer_texture.h"
#include "core/error/error_macros.h"
#include "core/io/config_file.h" #include "core/io/config_file.h"
#include "core/io/image_loader.h" #include "core/io/image_loader.h"
#include "core/object/ref_counted.h"
#include "editor/editor_file_system.h" #include "editor/editor_file_system.h"
#include "editor/editor_node.h" #include "editor/editor_node.h"
#include "resource_importer_texture.h" #include "resource_importer_texture.h"
@ -263,12 +265,12 @@ void ResourceImporterLayeredTexture::_save_tex(Vector<Ref<Image>> p_images, cons
f->store_8('L'); f->store_8('L');
f->store_32(StreamTextureLayered::FORMAT_VERSION); f->store_32(StreamTextureLayered::FORMAT_VERSION);
f->store_32(p_images.size()); //2d layers or 3d depth f->store_32(p_images.size()); // For 2d layers or 3d depth.
f->store_32(mode); f->store_32(mode);
f->store_32(0); f->store_32(0);
f->store_32(0); f->store_32(0);
f->store_32(mipmap_images.size()); // amount of mipmaps f->store_32(mipmap_images.size()); // Adjust the amount of mipmaps.
f->store_32(0); f->store_32(0);
f->store_32(0); f->store_32(0);
@ -289,7 +291,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
int hdr_compression = p_options["compress/hdr_compression"]; int hdr_compression = p_options["compress/hdr_compression"];
int bptc_ldr = p_options["compress/bptc_ldr"]; int bptc_ldr = p_options["compress/bptc_ldr"];
bool mipmaps = p_options["mipmaps/generate"]; bool mipmaps = p_options["mipmaps/generate"];
//bool mipmap_limit = p_options["mipmaps/limit"];
int channel_pack = p_options["compress/channel_pack"]; int channel_pack = p_options["compress/channel_pack"];
int hslices = (p_options.has("slices/horizontal")) ? int(p_options["slices/horizontal"]) : 0; int hslices = (p_options.has("slices/horizontal")) ? int(p_options["slices/horizontal"]) : 0;
@ -377,87 +378,23 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const
slices.push_back(slice); slices.push_back(slice);
} }
} }
String extension = get_save_extension();
Array formats_imported; Array formats_imported;
Ref<LayeredTextureImport> texture_import;
if (compress_mode == COMPRESS_VRAM_COMPRESSED) { texture_import.instantiate();
//must import in all formats, in order of priority (so platform choses the best supported one. IE, etc2 over etc). texture_import->csource = &csource;
//Android, GLES 2.x texture_import->save_path = p_save_path;
texture_import->options = p_options;
bool ok_on_pc = false; texture_import->platform_variants = r_platform_variants;
bool is_hdr = (image->get_format() >= Image::FORMAT_RF && image->get_format() <= Image::FORMAT_RGBE9995); texture_import->image = image;
bool is_ldr = (image->get_format() >= Image::FORMAT_L8 && image->get_format() <= Image::FORMAT_RGB565); texture_import->formats_imported = formats_imported;
bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc"); texture_import->slices = &slices;
bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc"); texture_import->compress_mode = compress_mode;
texture_import->lossy = lossy;
if (can_bptc) { texture_import->hdr_compression = hdr_compression;
formats_imported.push_back("bptc"); // Needs to be added anyway. texture_import->bptc_ldr = bptc_ldr;
} texture_import->mipmaps = mipmaps;
bool can_compress_hdr = hdr_compression > 0; texture_import->used_channels = used_channels;
_check_compress_stex(texture_import);
if (is_hdr && can_compress_hdr) {
if (used_channels == Image::USED_CHANNELS_LA || used_channels == Image::USED_CHANNELS_RGBA) {
//can compress hdr, but hdr with alpha is not compressible
if (hdr_compression == 2) {
//but user selected to compress hdr anyway, so force an alpha-less format.
if (image->get_format() == Image::FORMAT_RGBAF) {
for (int i = 0; i < slices.size(); i++) {
slices.write[i]->convert(Image::FORMAT_RGBF);
}
} else if (image->get_format() == Image::FORMAT_RGBAH) {
for (int i = 0; i < slices.size(); i++) {
slices.write[i]->convert(Image::FORMAT_RGBH);
}
}
} else {
can_compress_hdr = false;
}
}
if (can_compress_hdr) {
if (!can_bptc) {
//default to rgbe
if (image->get_format() != Image::FORMAT_RGBE9995) {
for (int i = 0; i < slices.size(); i++) {
slices.write[i]->convert(Image::FORMAT_RGBE9995);
}
}
}
} else {
can_bptc = false;
}
}
if (is_ldr && can_bptc) {
if (bptc_ldr == 0 || (bptc_ldr == 1 && !(used_channels == Image::USED_CHANNELS_LA || used_channels == Image::USED_CHANNELS_RGBA))) {
can_bptc = false;
}
}
if (can_bptc || can_s3tc) {
_save_tex(slices, p_save_path + ".s3tc." + extension, compress_mode, lossy, can_bptc ? Image::COMPRESS_BPTC : Image::COMPRESS_S3TC, csource, used_channels, mipmaps, false);
r_platform_variants->push_back("s3tc");
formats_imported.push_back("s3tc");
ok_on_pc = true;
}
if (ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_etc2")) {
_save_tex(slices, p_save_path + ".etc2." + extension, compress_mode, lossy, Image::COMPRESS_ETC2, csource, used_channels, mipmaps, true);
r_platform_variants->push_back("etc2");
formats_imported.push_back("etc2");
}
if (!ok_on_pc) {
EditorNode::add_io_error("Warning, no suitable PC VRAM compression enabled in Project Settings. This texture will not display correctly on PC.");
}
} else {
//import normally
_save_tex(slices, p_save_path + "." + extension, compress_mode, lossy, Image::COMPRESS_S3TC /* IGNORED */, csource, used_channels, mipmaps, false);
}
if (r_metadata) { if (r_metadata) {
Dictionary metadata; Dictionary metadata;
metadata["vram_texture"] = compress_mode == COMPRESS_VRAM_COMPRESSED; metadata["vram_texture"] = compress_mode == COMPRESS_VRAM_COMPRESSED;
@ -537,3 +474,76 @@ ResourceImporterLayeredTexture::ResourceImporterLayeredTexture() {
ResourceImporterLayeredTexture::~ResourceImporterLayeredTexture() { ResourceImporterLayeredTexture::~ResourceImporterLayeredTexture() {
} }
void ResourceImporterLayeredTexture::_check_compress_stex(Ref<LayeredTextureImport> r_texture_import) {
String extension = get_save_extension();
ERR_FAIL_NULL(r_texture_import->csource);
if (r_texture_import->compress_mode != COMPRESS_VRAM_COMPRESSED) {
// Import normally.
_save_tex(*r_texture_import->slices, r_texture_import->save_path + "." + extension, r_texture_import->compress_mode, r_texture_import->lossy, Image::COMPRESS_S3TC /* IGNORED */, *r_texture_import->csource, r_texture_import->used_channels, r_texture_import->mipmaps, false);
return;
}
// Must import in all formats, in order of priority (so platform choses the best supported one. IE, etc2 over etc).
// Android, GLES 2.x
bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc");
if (can_bptc) {
r_texture_import->formats_imported.push_back("bptc"); // BPTC needs to be added anyway.
}
bool can_compress_hdr = r_texture_import->hdr_compression > 0;
ERR_FAIL_NULL(r_texture_import->image);
bool is_hdr = (r_texture_import->image->get_format() >= Image::FORMAT_RF && r_texture_import->image->get_format() <= Image::FORMAT_RGBE9995);
bool is_ldr = (r_texture_import->image->get_format() >= Image::FORMAT_L8 && r_texture_import->image->get_format() <= Image::FORMAT_RGB565);
bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc");
ERR_FAIL_NULL(r_texture_import->slices);
// Can compress hdr, but hdr with alpha is not compressible.
if (r_texture_import->hdr_compression == 2) {
// The user selected to compress hdr anyway, so force an alpha-less format.
if (r_texture_import->image->get_format() == Image::FORMAT_RGBAF) {
for (int i = 0; i < r_texture_import->slices->size(); i++) {
r_texture_import->slices->write[i]->convert(Image::FORMAT_RGBF);
}
} else if (r_texture_import->image->get_format() == Image::FORMAT_RGBAH) {
for (int i = 0; i < r_texture_import->slices->size(); i++) {
r_texture_import->slices->write[i]->convert(Image::FORMAT_RGBH);
}
}
} else {
can_compress_hdr = false;
}
if (is_hdr && can_compress_hdr) {
if (!can_bptc) {
//default to rgbe
if (r_texture_import->image->get_format() != Image::FORMAT_RGBE9995) {
for (int i = 0; i < r_texture_import->slices->size(); i++) {
r_texture_import->slices->write[i]->convert(Image::FORMAT_RGBE9995);
}
}
}
} else {
can_bptc = false;
}
if (is_ldr && can_bptc) {
if (r_texture_import->bptc_ldr == 0 || (r_texture_import->bptc_ldr == 1 && !(r_texture_import->used_channels == Image::USED_CHANNELS_LA || r_texture_import->used_channels == Image::USED_CHANNELS_RGBA))) {
can_bptc = false;
}
}
if (!(r_texture_import->used_channels == Image::USED_CHANNELS_LA || r_texture_import->used_channels == Image::USED_CHANNELS_RGBA)) {
if (ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_etc2")) {
_save_tex(*r_texture_import->slices, r_texture_import->save_path + ".etc2." + extension, r_texture_import->compress_mode, r_texture_import->lossy, Image::COMPRESS_ETC2, *r_texture_import->csource, r_texture_import->used_channels, r_texture_import->mipmaps, true);
r_texture_import->platform_variants->push_back("etc2");
r_texture_import->formats_imported.push_back("etc2");
}
if (can_bptc || can_s3tc) {
_save_tex(*r_texture_import->slices, r_texture_import->save_path + ".s3tc." + extension, r_texture_import->compress_mode, r_texture_import->lossy, can_bptc ? Image::COMPRESS_BPTC : Image::COMPRESS_S3TC, *r_texture_import->csource, r_texture_import->used_channels, r_texture_import->mipmaps, false);
r_texture_import->platform_variants->push_back("s3tc");
r_texture_import->formats_imported.push_back("s3tc");
}
return;
}
EditorNode::add_io_error("Warning, no suitable PC VRAM compression enabled in Project Settings. This texture will not display correctly on PC.");
}

View file

@ -33,9 +33,30 @@
#include "core/io/image.h" #include "core/io/image.h"
#include "core/io/resource_importer.h" #include "core/io/resource_importer.h"
#include "core/object/ref_counted.h"
class StreamTexture2D; class StreamTexture2D;
class LayeredTextureImport : public RefCounted {
GDCLASS(LayeredTextureImport, RefCounted);
public:
Image::CompressSource *csource = nullptr;
String save_path;
Map<StringName, Variant> options;
List<String> *platform_variants = nullptr;
Ref<Image> image = nullptr;
Array formats_imported;
Vector<Ref<Image>> *slices = nullptr;
int compress_mode = 0;
float lossy = 1.0;
int hdr_compression = 0;
int bptc_ldr = 0;
bool mipmaps = true;
Image::UsedChannels used_channels = Image::USED_CHANNELS_RGBA;
virtual ~LayeredTextureImport() {}
};
class ResourceImporterLayeredTexture : public ResourceImporter { class ResourceImporterLayeredTexture : public ResourceImporter {
GDCLASS(ResourceImporterLayeredTexture, ResourceImporter); GDCLASS(ResourceImporterLayeredTexture, ResourceImporter);
@ -66,6 +87,8 @@ protected:
static ResourceImporterLayeredTexture *singleton; static ResourceImporterLayeredTexture *singleton;
public: public:
void _check_compress_stex(Ref<LayeredTextureImport> r_texture_import);
static ResourceImporterLayeredTexture *get_singleton() { return singleton; } static ResourceImporterLayeredTexture *get_singleton() { return singleton; }
virtual String get_importer_name() const override; virtual String get_importer_name() const override;
virtual String get_visible_name() const override; virtual String get_visible_name() const override;

View file

@ -496,11 +496,10 @@ Error ResourceImporterTexture::import(const String &p_source_file, const String
//must import in all formats, in order of priority (so platform choses the best supported one. IE, etc2 over etc). //must import in all formats, in order of priority (so platform choses the best supported one. IE, etc2 over etc).
//Android, GLES 2.x //Android, GLES 2.x
bool ok_on_pc = false; const bool is_hdr = (image->get_format() >= Image::FORMAT_RF && image->get_format() <= Image::FORMAT_RGBE9995);
bool is_hdr = (image->get_format() >= Image::FORMAT_RF && image->get_format() <= Image::FORMAT_RGBE9995);
bool is_ldr = (image->get_format() >= Image::FORMAT_L8 && image->get_format() <= Image::FORMAT_RGB565); bool is_ldr = (image->get_format() >= Image::FORMAT_L8 && image->get_format() <= Image::FORMAT_RGB565);
bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc"); const bool can_bptc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_bptc");
bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc"); const bool can_s3tc = ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_s3tc");
if (can_bptc) { if (can_bptc) {
//add to the list anyway //add to the list anyway
@ -525,29 +524,24 @@ Error ResourceImporterTexture::import(const String &p_source_file, const String
} }
} }
if (can_compress_hdr) { if (!can_compress_hdr) {
if (!can_bptc) { //fallback to RGBE99995
//fallback to RGBE99995 if (image->get_format() != Image::FORMAT_RGBE9995) {
if (image->get_format() != Image::FORMAT_RGBE9995) { image->convert(Image::FORMAT_RGBE9995);
image->convert(Image::FORMAT_RGBE9995);
}
} }
} else {
can_bptc = false;
}
}
if (is_ldr && can_bptc) {
if (bptc_ldr == 0 || (bptc_ldr == 1 && !has_alpha)) {
can_bptc = false;
} }
} }
bool ok_on_pc = false;
if (can_bptc || can_s3tc) { if (can_bptc || can_s3tc) {
_save_stex(image, p_save_path + ".s3tc.stex", compress_mode, lossy, can_bptc ? Image::COMPRESS_BPTC : Image::COMPRESS_S3TC, mipmaps, stream, detect_3d, detect_roughness, detect_normal, force_normal, srgb_friendly_pack, false, mipmap_limit, normal_image, roughness_channel); ok_on_pc = true;
Image::CompressMode image_compress_mode = Image::COMPRESS_BPTC;
if (!bptc_ldr && can_s3tc && is_ldr) {
image_compress_mode = Image::COMPRESS_S3TC;
}
_save_stex(image, p_save_path + ".s3tc.stex", compress_mode, lossy, image_compress_mode, mipmaps, stream, detect_3d, detect_roughness, detect_normal, force_normal, srgb_friendly_pack, false, mipmap_limit, normal_image, roughness_channel);
r_platform_variants->push_back("s3tc"); r_platform_variants->push_back("s3tc");
formats_imported.push_back("s3tc"); formats_imported.push_back("s3tc");
ok_on_pc = true;
} }
if (ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_etc2")) { if (ProjectSettings::get_singleton()->get("rendering/textures/vram_compression/import_etc2")) {

View file

@ -11,7 +11,16 @@ thirdparty_obj = []
thirdparty_dir = "#thirdparty/cvtt/" thirdparty_dir = "#thirdparty/cvtt/"
thirdparty_sources = [ thirdparty_sources = [
"ConvectionKernels.cpp", "ConvectionKernels_API.cpp",
"ConvectionKernels_ETC.cpp",
"ConvectionKernels_BC67.cpp",
"ConvectionKernels_IndexSelector.cpp",
"ConvectionKernels_BC6H_IO.cpp",
"ConvectionKernels_S3TC.cpp",
"ConvectionKernels_BC7_PrioData.cpp",
"ConvectionKernels_SingleFile.cpp",
"ConvectionKernels_BCCommon.cpp",
"ConvectionKernels_Util.cpp",
] ]
thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]

View file

@ -41,7 +41,7 @@ struct CVTTCompressionJobParams {
bool is_hdr = false; bool is_hdr = false;
bool is_signed = false; bool is_signed = false;
int bytes_per_pixel = 0; int bytes_per_pixel = 0;
cvtt::BC7EncodingPlan bc7_plan;
cvtt::Options options; cvtt::Options options;
}; };
@ -116,7 +116,7 @@ static void _digest_row_task(const CVTTCompressionJobParams &p_job_params, const
cvtt::Kernels::EncodeBC6HU(output_blocks, input_blocks_hdr, p_job_params.options); cvtt::Kernels::EncodeBC6HU(output_blocks, input_blocks_hdr, p_job_params.options);
} }
} else { } else {
cvtt::Kernels::EncodeBC7(output_blocks, input_blocks_ldr, p_job_params.options); cvtt::Kernels::EncodeBC7(output_blocks, input_blocks_ldr, p_job_params.options, p_job_params.bc7_plan);
} }
unsigned int num_real_blocks = ((w - x_start) + 3) / 4; unsigned int num_real_blocks = ((w - x_start) + 3) / 4;
@ -141,7 +141,6 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
if (p_image->get_format() >= Image::FORMAT_BPTC_RGBA) { if (p_image->get_format() >= Image::FORMAT_BPTC_RGBA) {
return; //do not compress, already compressed return; //do not compress, already compressed
} }
int w = p_image->get_width(); int w = p_image->get_width();
int h = p_image->get_height(); int h = p_image->get_height();
@ -153,22 +152,8 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
} }
cvtt::Options options; cvtt::Options options;
uint32_t flags = cvtt::Flags::Fastest; uint32_t flags = cvtt::Flags::Default;
if (p_lossy_quality > 0.85) {
flags = cvtt::Flags::Ultra;
} else if (p_lossy_quality > 0.75) {
flags = cvtt::Flags::Better;
} else if (p_lossy_quality > 0.55) {
flags = cvtt::Flags::Default;
} else if (p_lossy_quality > 0.35) {
flags = cvtt::Flags::Fast;
} else if (p_lossy_quality > 0.15) {
flags = cvtt::Flags::Faster;
}
flags |= cvtt::Flags::BC7_RespectPunchThrough; flags |= cvtt::Flags::BC7_RespectPunchThrough;
if (p_channels == Image::USED_CHANNELS_RG) { //guessing this is a normal map if (p_channels == Image::USED_CHANNELS_RG) { //guessing this is a normal map
flags |= cvtt::Flags::Uniform; flags |= cvtt::Flags::Uniform;
} }
@ -215,12 +200,15 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
job_queue.job_params.is_signed = is_signed; job_queue.job_params.is_signed = is_signed;
job_queue.job_params.options = options; job_queue.job_params.options = options;
job_queue.job_params.bytes_per_pixel = is_hdr ? 6 : 4; job_queue.job_params.bytes_per_pixel = is_hdr ? 6 : 4;
cvtt::Kernels::ConfigureBC7EncodingPlanFromQuality(job_queue.job_params.bc7_plan, 5);
#ifdef NO_THREADS
int num_job_threads = 0; int num_job_threads = 0;
#else // Amdahl's law (Wikipedia)
int num_job_threads = OS::get_singleton()->can_use_threads() ? (OS::get_singleton()->get_processor_count() - 1) : 0; // If a program needs 20 hours to complete using a single thread, but a one-hour portion of the program cannot be parallelized,
#endif // therefore only the remaining 19 hours (p = 0.95) of execution time can be parallelized, then regardless of how many threads are devoted
// to a parallelized execution of this program, the minimum execution time cannot be less than one hour.
//
// The number of executions with different inputs can be increased while the latency is the same.
Vector<CVTTCompressionRowTask> tasks; Vector<CVTTCompressionRowTask> tasks;
@ -278,7 +266,6 @@ void image_compress_cvtt(Image *p_image, float p_lossy_quality, Image::UsedChann
memdelete(threads_wb[i]); memdelete(threads_wb[i]);
} }
} }
p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data); p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
} }
@ -388,6 +375,5 @@ void image_decompress_cvtt(Image *p_image) {
w >>= 1; w >>= 1;
h >>= 1; h >>= 1;
} }
p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data); p_image->create(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data);
} }

View file

@ -52,13 +52,13 @@ Includes some patches in the `patches` folder which have been sent upstream.
## cvtt ## cvtt
- Upstream: https://github.com/elasota/cvtt - Upstream: https://github.com/elasota/ConvectionKernels
- Version: 1.0.0-beta4 (cc8472a04ba110fe999c686d07af40f7839051fd, 2018) - Version: git (dc2dbbe0ae2cf2be06ef56d1021e2222a56c7fe2, 2021)
- License: MIT - License: MIT
Files extracted from upstream source: Files extracted from upstream source:
- all .cpp, .h, and .txt files in ConvectionKernels/ - all .cpp, .h, and .txt files except the folders MakeTables and etc2packer.
## doctest ## doctest

File diff suppressed because it is too large Load diff

View file

@ -25,21 +25,13 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#ifndef __CVTT_CONVECTION_KERNELS__ #ifndef __CVTT_CONVECTION_KERNELS__
#define __CVTT_CONVECTION_KERNELS__ #define __CVTT_CONVECTION_KERNELS__
#include <stddef.h>
#include <stdint.h> #include <stdint.h>
namespace cvtt namespace cvtt
{ {
namespace Flags namespace Flags
{ {
// Enable partitioned modes in BC7 encoding (slower, better quality)
const uint32_t BC7_EnablePartitioning = 0x001;
// Enable 3-partition modes in BC7 encoding (slower, better quality, requires BC7_EnablePartitioning)
const uint32_t BC7_Enable3Subsets = 0x002;
// Enable dual-plane modes in BC7 encoding (slower, better quality)
const uint32_t BC7_EnableDualPlane = 0x004;
// Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality) // Use fast indexing in BC7 encoding (about 2x faster, slightly worse quality)
const uint32_t BC7_FastIndexing = 0x008; const uint32_t BC7_FastIndexing = 0x008;
@ -61,13 +53,19 @@ namespace cvtt
// Uniform color channel importance // Uniform color channel importance
const uint32_t Uniform = 0x200; const uint32_t Uniform = 0x200;
// Use fake BT.709 color space for etc2comp compatibility (slower)
const uint32_t ETC_UseFakeBT709 = 0x400;
// Use accurate quantization functions when quantizing fake BT.709 (much slower, marginal improvement on specific blocks)
const uint32_t ETC_FakeBT709Accurate = 0x800;
// Misc useful default flag combinations // Misc useful default flag combinations
const uint32_t Fastest = (BC6H_FastIndexing | S3TC_Paranoid); const uint32_t Fastest = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
const uint32_t Faster = (BC7_EnableDualPlane | BC6H_FastIndexing | S3TC_Paranoid); const uint32_t Faster = (BC6H_FastIndexing | BC7_FastIndexing | S3TC_Paranoid);
const uint32_t Fast = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_FastIndexing | S3TC_Paranoid); const uint32_t Fast = (BC7_FastIndexing | S3TC_Paranoid);
const uint32_t Default = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | BC7_FastIndexing | S3TC_Paranoid); const uint32_t Default = (BC7_FastIndexing | S3TC_Paranoid);
const uint32_t Better = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | S3TC_Paranoid | S3TC_Exhaustive); const uint32_t Better = (S3TC_Paranoid | S3TC_Exhaustive);
const uint32_t Ultra = (BC7_EnablePartitioning | BC7_EnableDualPlane | BC7_Enable3Subsets | BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive); const uint32_t Ultra = (BC7_TrySingleColor | S3TC_Paranoid | S3TC_Exhaustive | ETC_FakeBT709Accurate);
} }
const unsigned int NumParallelBlocks = 8; const unsigned int NumParallelBlocks = 8;
@ -81,7 +79,7 @@ namespace cvtt
float blueWeight; // Blue channel importance float blueWeight; // Blue channel importance
float alphaWeight; // Alpha channel importance float alphaWeight; // Alpha channel importance
int refineRoundsBC7; // Number of refine rounds for BC7 int refineRoundsBC7; // Number of refine rounds for BC7
int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3) int refineRoundsBC6H; // Number of refine rounds for BC6H (max 3)
int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5) int refineRoundsIIC; // Number of refine rounds for independent interpolated channels (BC3 alpha, BC4, BC5)
int refineRoundsS3TC; // Number of refine rounds for S3TC RGB int refineRoundsS3TC; // Number of refine rounds for S3TC RGB
@ -104,6 +102,102 @@ namespace cvtt
} }
}; };
struct BC7FineTuningParams
{
// Seed point counts for each mode+configuration combination
uint8_t mode0SP[16];
uint8_t mode1SP[64];
uint8_t mode2SP[64];
uint8_t mode3SP[64];
uint8_t mode4SP[4][2];
uint8_t mode5SP[4];
uint8_t mode6SP;
uint8_t mode7SP[64];
BC7FineTuningParams()
{
for (int i = 0; i < 16; i++)
this->mode0SP[i] = 4;
for (int i = 0; i < 64; i++)
{
this->mode1SP[i] = 4;
this->mode2SP[i] = 4;
this->mode3SP[i] = 4;
this->mode7SP[i] = 4;
}
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 2; j++)
this->mode4SP[i][j] = 4;
this->mode5SP[i] = 4;
}
this->mode6SP = 4;
}
};
struct BC7EncodingPlan
{
static const int kNumRGBAShapes = 129;
static const int kNumRGBShapes = 243;
uint64_t mode1PartitionEnabled;
uint64_t mode2PartitionEnabled;
uint64_t mode3PartitionEnabled;
uint16_t mode0PartitionEnabled;
uint64_t mode7RGBAPartitionEnabled;
uint64_t mode7RGBPartitionEnabled;
uint8_t mode4SP[4][2];
uint8_t mode5SP[4];
bool mode6Enabled;
uint8_t seedPointsForShapeRGB[kNumRGBShapes];
uint8_t seedPointsForShapeRGBA[kNumRGBAShapes];
uint8_t rgbaShapeList[kNumRGBAShapes];
uint8_t rgbaNumShapesToEvaluate;
uint8_t rgbShapeList[kNumRGBShapes];
uint8_t rgbNumShapesToEvaluate;
BC7EncodingPlan()
{
for (int i = 0; i < kNumRGBShapes; i++)
{
this->rgbShapeList[i] = i;
this->seedPointsForShapeRGB[i] = 4;
}
this->rgbNumShapesToEvaluate = kNumRGBShapes;
for (int i = 0; i < kNumRGBAShapes; i++)
{
this->rgbaShapeList[i] = i;
this->seedPointsForShapeRGBA[i] = 4;
}
this->rgbaNumShapesToEvaluate = kNumRGBAShapes;
this->mode0PartitionEnabled = 0xffff;
this->mode1PartitionEnabled = 0xffffffffffffffffULL;
this->mode2PartitionEnabled = 0xffffffffffffffffULL;
this->mode3PartitionEnabled = 0xffffffffffffffffULL;
this->mode6Enabled = true;
this->mode7RGBPartitionEnabled = 0xffffffffffffffffULL;
this->mode7RGBAPartitionEnabled = 0xffffffffffffffffULL;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 2; j++)
this->mode4SP[i][j] = 4;
this->mode5SP[i] = 4;
}
}
};
// RGBA input block for unsigned 8-bit formats // RGBA input block for unsigned 8-bit formats
struct PixelBlockU8 struct PixelBlockU8
{ {
@ -116,14 +210,34 @@ namespace cvtt
int8_t m_pixels[16][4]; int8_t m_pixels[16][4];
}; };
struct PixelBlockScalarS16
{
int16_t m_pixels[16];
};
// RGBA input block for half-precision float formats (bit-cast to int16_t) // RGBA input block for half-precision float formats (bit-cast to int16_t)
struct PixelBlockF16 struct PixelBlockF16
{ {
int16_t m_pixels[16][4]; int16_t m_pixels[16][4];
}; };
class ETC2CompressionData
{
protected:
ETC2CompressionData() {}
};
class ETC1CompressionData
{
protected:
ETC1CompressionData() {}
};
namespace Kernels namespace Kernels
{ {
typedef void* allocFunc_t(void *context, size_t size);
typedef void freeFunc_t(void *context, void* ptr, size_t size);
// NOTE: All functions accept and output NumParallelBlocks blocks at once // NOTE: All functions accept and output NumParallelBlocks blocks at once
void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options);
@ -134,7 +248,28 @@ namespace cvtt
void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options); void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options);
void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options); void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options); void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const Options &options);
void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options); void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, const BC7EncodingPlan &encodingPlan);
void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC1CompressionData *compressionData);
void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options, ETC2CompressionData *compressionData);
void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData);
void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options);
void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options);
// Generates a BC7 encoding plan from a quality parameter that ranges from 1 (fastest) to 100 (best)
void ConfigureBC7EncodingPlanFromQuality(BC7EncodingPlan &encodingPlan, int quality);
// Generates a BC7 encoding plan from fine-tuning parameters.
bool ConfigureBC7EncodingPlanFromFineTuningParams(BC7EncodingPlan &encodingPlan, const BC7FineTuningParams &params);
// ETC compression requires temporary storage that normally consumes a large amount of stack space.
// To allocate and release it, use one of these functions.
ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options);
void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc);
ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context);
void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc);
void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC); void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC);
void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC); void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC);

View file

@ -0,0 +1,346 @@
/*
Convection Texture Tools
Copyright (c) 2018-2019 Eric Lasota
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#include "ConvectionKernels_Config.h"
#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
#include <stdint.h>
#include "ConvectionKernels.h"
#include "ConvectionKernels_Util.h"
#include "ConvectionKernels_BC67.h"
#include "ConvectionKernels_ETC.h"
#include "ConvectionKernels_S3TC.h"
#include <assert.h>
namespace cvtt
{
namespace Kernels
{
void EncodeBC7(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, const BC7EncodingPlan &encodingPlan)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::BC7Computer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, encodingPlan, options.refineRoundsBC7);
pBC += ParallelMath::ParallelSize * 16;
}
}
void EncodeBC6HU(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, false, options.seedPoints, options.refineRoundsBC6H);
pBC += ParallelMath::ParallelSize * 16;
}
}
void EncodeBC6HS(uint8_t *pBC, const PixelBlockF16 *pBlocks, const cvtt::Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::BC6HComputer::Pack(options.flags, pBlocks + blockBase, pBC, channelWeights, true, options.seedPoints, options.refineRoundsBC6H);
pBC += ParallelMath::ParallelSize * 16;
}
}
void EncodeBC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC, 8, channelWeights, true, options.threshold, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeBC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
Internal::S3TCComputer::PackExplicitAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16);
pBC += ParallelMath::ParallelSize * 16;
}
}
void EncodeBC3(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::S3TCComputer::PackRGB(options.flags, pBlocks + blockBase, pBC + 8, 16, channelWeights, false, 1.0f, (options.flags & Flags::S3TC_Exhaustive) != 0, options.seedPoints, options.refineRoundsS3TC);
Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 3, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
pBC += ParallelMath::ParallelSize * 16;
}
}
void EncodeBC4U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 8, false, options.seedPoints, options.refineRoundsIIC);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeBC4S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
Util::BiasSignedInput(inputBlocks, pBlocks + blockBase);
Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 8, true, options.seedPoints, options.refineRoundsIIC);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeBC5U(uint8_t *pBC, const PixelBlockU8 *pBlocks, const Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 0, pBC, 16, false, options.seedPoints, options.refineRoundsIIC);
Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, pBlocks + blockBase, 1, pBC + 8, 16, false, options.seedPoints, options.refineRoundsIIC);
pBC += ParallelMath::ParallelSize * 16;
}
}
void EncodeBC5S(uint8_t *pBC, const PixelBlockS8 *pBlocks, const Options &options)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
PixelBlockU8 inputBlocks[ParallelMath::ParallelSize];
Util::BiasSignedInput(inputBlocks, pBlocks + blockBase);
Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 0, pBC, 16, true, options.seedPoints, options.refineRoundsIIC);
Internal::S3TCComputer::PackInterpolatedAlpha(options.flags, inputBlocks, 1, pBC + 8, 16, true, options.seedPoints, options.refineRoundsIIC);
pBC += ParallelMath::ParallelSize * 16;
}
}
void EncodeETC1(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC1CompressionData *compressionData)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::ETCComputer::CompressETC1Block(pBC, pBlocks + blockBase, compressionData, options);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeETC2(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, false);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeETC2PunchthroughAlpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
{
assert(pBlocks);
assert(pBC);
float channelWeights[4];
Util::FillWeights(options, channelWeights);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::ETCComputer::CompressETC2Block(pBC, pBlocks + blockBase, compressionData, options, true);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeETC2Alpha(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options)
{
assert(pBlocks);
assert(pBC);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::ETCComputer::CompressETC2AlphaBlock(pBC, pBlocks + blockBase, options);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeETC2Alpha11(uint8_t *pBC, const PixelBlockScalarS16 *pBlocks, bool isSigned, const cvtt::Options &options)
{
assert(pBlocks);
assert(pBC);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase += ParallelMath::ParallelSize)
{
Internal::ETCComputer::CompressEACBlock(pBC, pBlocks + blockBase, isSigned, options);
pBC += ParallelMath::ParallelSize * 8;
}
}
void EncodeETC2RGBA(uint8_t *pBC, const PixelBlockU8 *pBlocks, const cvtt::Options &options, cvtt::ETC2CompressionData *compressionData)
{
uint8_t alphaBlockData[cvtt::NumParallelBlocks * 8];
uint8_t colorBlockData[cvtt::NumParallelBlocks * 8];
EncodeETC2(colorBlockData, pBlocks, options, compressionData);
EncodeETC2Alpha(alphaBlockData, pBlocks, options);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
{
for (size_t blockData = 0; blockData < 8; blockData++)
pBC[blockBase * 16 + blockData] = alphaBlockData[blockBase * 8 + blockData];
for (size_t blockData = 0; blockData < 8; blockData++)
pBC[blockBase * 16 + 8 + blockData] = colorBlockData[blockBase * 8 + blockData];
}
}
void DecodeBC7(PixelBlockU8 *pBlocks, const uint8_t *pBC)
{
assert(pBlocks);
assert(pBC);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
{
Internal::BC7Computer::UnpackOne(pBlocks[blockBase], pBC);
pBC += 16;
}
}
void DecodeBC6HU(PixelBlockF16 *pBlocks, const uint8_t *pBC)
{
assert(pBlocks);
assert(pBC);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
{
Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, false);
pBC += 16;
}
}
void DecodeBC6HS(PixelBlockF16 *pBlocks, const uint8_t *pBC)
{
assert(pBlocks);
assert(pBC);
for (size_t blockBase = 0; blockBase < cvtt::NumParallelBlocks; blockBase++)
{
Internal::BC6HComputer::UnpackOne(pBlocks[blockBase], pBC, true);
pBC += 16;
}
}
ETC1CompressionData *AllocETC1Data(allocFunc_t allocFunc, void *context)
{
return cvtt::Internal::ETCComputer::AllocETC1Data(allocFunc, context);
}
void ReleaseETC1Data(ETC1CompressionData *compressionData, freeFunc_t freeFunc)
{
cvtt::Internal::ETCComputer::ReleaseETC1Data(compressionData, freeFunc);
}
ETC2CompressionData *AllocETC2Data(allocFunc_t allocFunc, void *context, const cvtt::Options &options)
{
return cvtt::Internal::ETCComputer::AllocETC2Data(allocFunc, context, options);
}
void ReleaseETC2Data(ETC2CompressionData *compressionData, freeFunc_t freeFunc)
{
cvtt::Internal::ETCComputer::ReleaseETC2Data(compressionData, freeFunc);
}
}
}
#endif

View file

@ -0,0 +1,55 @@
#pragma once
#ifndef __CVTT_AGGREGATEDERROR_H__
#define __CVTT_AGGREGATEDERROR_H__
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
namespace Internal
{
template<int TVectorSize>
class AggregatedError
{
public:
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::UInt31 MUInt31;
typedef ParallelMath::Float MFloat;
AggregatedError()
{
for (int ch = 0; ch < TVectorSize; ch++)
m_errorUnweighted[ch] = ParallelMath::MakeUInt31(0);
}
void Add(const MUInt16 &channelErrorUnweighted, int ch)
{
m_errorUnweighted[ch] = m_errorUnweighted[ch] + ParallelMath::ToUInt31(channelErrorUnweighted);
}
MFloat Finalize(uint32_t flags, const float channelWeightsSq[TVectorSize]) const
{
if (flags & cvtt::Flags::Uniform)
{
MUInt31 total = m_errorUnweighted[0];
for (int ch = 1; ch < TVectorSize; ch++)
total = total + m_errorUnweighted[ch];
return ParallelMath::ToFloat(total);
}
else
{
MFloat total = ParallelMath::ToFloat(m_errorUnweighted[0]) * channelWeightsSq[0];
for (int ch = 1; ch < TVectorSize; ch++)
total = total + ParallelMath::ToFloat(m_errorUnweighted[ch]) * channelWeightsSq[ch];
return total;
}
}
private:
MUInt31 m_errorUnweighted[TVectorSize];
};
}
}
#endif

3485
thirdparty/cvtt/ConvectionKernels_BC67.cpp vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,99 @@
#pragma once
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
namespace Tables
{
namespace BC7SC
{
struct Table;
}
}
namespace Internal
{
namespace BC67
{
struct WorkInfo;
}
template<int TVectorSize>
class IndexSelectorHDR;
}
struct PixelBlockU8;
}
namespace cvtt
{
namespace Internal
{
class BC7Computer
{
public:
static void Pack(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds);
static void UnpackOne(PixelBlockU8 &output, const uint8_t* packedBlock);
private:
static const int MaxTweakRounds = 4;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::SInt32 MSInt32;
typedef ParallelMath::Float MFloat;
static void TweakAlpha(const MUInt15 original[2], int tweak, int range, MUInt15 result[2]);
static void Quantize(MUInt15* color, int bits, int channels);
static void QuantizeP(MUInt15* color, int bits, uint16_t p, int channels);
static void Unquantize(MUInt15* color, int bits, int channels);
static void CompressEndpoints0(MUInt15 ep[2][4], uint16_t p[2]);
static void CompressEndpoints1(MUInt15 ep[2][4], uint16_t p);
static void CompressEndpoints2(MUInt15 ep[2][4]);
static void CompressEndpoints3(MUInt15 ep[2][4], uint16_t p[2]);
static void CompressEndpoints4(MUInt15 epRGB[2][3], MUInt15 epA[2]);
static void CompressEndpoints5(MUInt15 epRGB[2][3], MUInt15 epA[2]);
static void CompressEndpoints6(MUInt15 ep[2][4], uint16_t p[2]);
static void CompressEndpoints7(MUInt15 ep[2][4], uint16_t p[2]);
static void TrySingleColorRGBAMultiTable(uint32_t flags, const MUInt15 pixels[16][4], const MFloat average[4], int numRealChannels, const uint8_t *fragmentStart, int shapeLength, const MFloat &staticAlphaError, const ParallelMath::Int16CompFlag punchThroughInvalid[4], MFloat& shapeBestError, MUInt15 shapeBestEP[2][4], MUInt15 *fragmentBestIndexes, const float *channelWeightsSq, const cvtt::Tables::BC7SC::Table*const* tables, int numTables, const ParallelMath::RoundTowardNearestForScope *rtn);
static void TrySinglePlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn);
static void TryDualPlane(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const float channelWeights[4], const BC7EncodingPlan &encodingPlan, int numRefineRounds, BC67::WorkInfo& work, const ParallelMath::RoundTowardNearestForScope *rtn);
template<class T>
static void Swap(T& a, T& b);
};
class BC6HComputer
{
public:
static void Pack(uint32_t flags, const PixelBlockF16* inputs, uint8_t* packedBlocks, const float channelWeights[4], bool isSigned, int numTweakRounds, int numRefineRounds);
static void UnpackOne(PixelBlockF16 &output, const uint8_t *pBC, bool isSigned);
private:
typedef ParallelMath::Float MFloat;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::AInt16 MAInt16;
typedef ParallelMath::SInt32 MSInt32;
typedef ParallelMath::UInt31 MUInt31;
static const int MaxTweakRounds = 4;
static const int MaxRefineRounds = 3;
static MSInt16 QuantizeSingleEndpointElementSigned(const MSInt16 &elem2CL, int precision, const ParallelMath::RoundUpForScope* ru);
static MUInt15 QuantizeSingleEndpointElementUnsigned(const MUInt15 &elem, int precision, const ParallelMath::RoundUpForScope* ru);
static void UnquantizeSingleEndpointElementSigned(const MSInt16 &comp, int precision, MSInt16 &outUnquantized, MSInt16 &outUnquantizedFinished2CL);
static void UnquantizeSingleEndpointElementUnsigned(const MUInt15 &comp, int precision, MUInt16 &outUnquantized, MUInt16 &outUnquantizedFinished);
static void QuantizeEndpointsSigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn);
static void QuantizeEndpointsUnsigned(const MSInt16 endPoints[2][3], const MFloat floatPixelsColorSpace[16][3], const MFloat floatPixelsLinearWeighted[16][3], MAInt16 quantizedEndPoints[2][3], MUInt15 indexes[16], IndexSelectorHDR<3> &indexSelector, int fixupIndex, int precision, int indexRange, const float *channelWeights, bool fastIndexing, const ParallelMath::RoundTowardNearestForScope *rtn);
static void EvaluatePartitionedLegality(const MAInt16 ep0[2][3], const MAInt16 ep1[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][2][3], ParallelMath::Int16CompFlag& outIsLegal);
static void EvaluateSingleLegality(const MAInt16 ep[2][3], int aPrec, const int bPrec[3], bool isTransformed, MAInt16 outEncodedEPs[2][3], ParallelMath::Int16CompFlag& outIsLegal);
static void SignExtendSingle(int &v, int bits);
};
}
}

View file

@ -0,0 +1,881 @@
/*
Convection Texture Tools
Copyright (c) 2018-2019 Eric Lasota
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------------
Portions based on DirectX Texture Library (DirectXTex)
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
http://go.microsoft.com/fwlink/?LinkId=248926
*/
#include "ConvectionKernels_Config.h"
#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
#include "ConvectionKernels_BC6H_IO.h"
namespace cvtt
{
namespace BC6H_IO
{
void WriteMode0(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x3u) | ((gy >> 2) & 0x4u) | ((by >> 1) & 0x8u) | (bz & 0x10u) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
}
void WriteMode1(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x3u) | ((gy >> 3) & 0x4u) | ((gz >> 1) & 0x18u) | ((rw << 5) & 0xfe0u) | ((bz << 12) & 0x3000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x3f8000u) | ((by << 17) & 0x400000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bz >> 3) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u);
}
void WriteMode2(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xf8u) | ((rw >> 2) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x1e000u) | ((gw << 7) & 0x20000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x7800000u) | ((bw << 17) & 0x8000000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
}
void WriteMode3(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 3) & 0x80u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((gw << 8) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x7800000u) | ((bw << 17) & 0x8000000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x1eu) | ((bz << 5) & 0x20u) | ((bz << 4) & 0x40u) | ((rz << 7) & 0x780u) | ((gy << 7) & 0x800u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
}
void WriteMode4(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 3) & 0x80u) | ((by << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x1e000u) | ((gw << 7) & 0x20000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bw << 18) & 0x10000000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x1eu) | ((bz << 4) & 0x60u) | ((rz << 7) & 0x780u) | ((bz << 7) & 0x800u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
}
void WriteMode5(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x3fe0u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0xff8000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x3u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
}
void WriteMode6(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((gz << 9) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x1u) | ((bz >> 2) & 0x6u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u);
}
void WriteMode7(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((bz << 13) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((gy << 18) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x1u) | ((gz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0xf800000u) | ((bz << 27) & 0x10000000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
}
void WriteMode8(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x1fe0u) | ((bz << 12) & 0x2000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x7f8000u) | ((by << 18) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0xf8u) | ((gz << 4) & 0x100u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x3e000u) | ((bz << 18) & 0x40000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x3eu) | ((bz << 4) & 0x40u) | ((rz << 7) & 0xf80u) | ((bz << 9) & 0x1000u) | ((d << 13) & 0x3e000u);
}
void WriteMode9(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7e0u) | ((gz << 7) & 0x800u) | ((bz << 12) & 0x3000u) | ((by << 10) & 0x4000u) | ((gw << 15) & 0x1f8000u) | ((gy << 16) & 0x200000u) | ((by << 17) & 0x400000u) | ((bz << 21) & 0x800000u) | ((gy << 20) & 0x1000000u) | ((bw << 25) & 0x7e000000u) | ((gz << 26) & 0x80000000u);
encoded[1] = ((bz >> 3) & 0x1u) | ((bz >> 4) & 0x2u) | ((bz >> 2) & 0x4u) | ((rx << 3) & 0x1f8u) | ((gy << 9) & 0x1e00u) | ((gx << 13) & 0x7e000u) | ((gz << 19) & 0x780000u) | ((bx << 23) & 0x1f800000u) | ((by << 29) & 0xe0000000u);
encoded[2] = ((by >> 3) & 0x1u) | ((ry << 1) & 0x7eu) | ((rz << 7) & 0x1f80u) | ((d << 13) & 0x3e000u);
}
void WriteMode10(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x1ff8u) | ((gx << 13) & 0x7fe000u) | ((bx << 23) & 0xff800000u);
encoded[2] = ((bx >> 9) & 0x1u);
}
void WriteMode11(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0xff8u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x3fe000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0xff800000u);
encoded[2] = ((bw >> 10) & 0x1u);
}
void WriteMode12(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x7f8u) | (rw & 0x800u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x1fe000u) | ((gw << 10) & 0x200000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0x7f800000u) | ((bw << 20) & 0x80000000u);
encoded[2] = ((bw >> 10) & 0x1u);
}
void WriteMode13(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz)
{
encoded[0] = (m & 0x1fu) | ((rw << 5) & 0x7fe0u) | ((gw << 15) & 0x1ff8000u) | ((bw << 25) & 0xfe000000u);
encoded[1] = ((bw >> 7) & 0x7u) | ((rx << 3) & 0x78u) | ((rw >> 8) & 0x80u) | ((rw >> 6) & 0x100u) | ((rw >> 4) & 0x200u) | ((rw >> 2) & 0x400u) | (rw & 0x800u) | ((rw << 2) & 0x1000u) | ((gx << 13) & 0x1e000u) | ((gw << 2) & 0x20000u) | ((gw << 4) & 0x40000u) | ((gw << 6) & 0x80000u) | ((gw << 8) & 0x100000u) | ((gw << 10) & 0x200000u) | ((gw << 12) & 0x400000u) | ((bx << 23) & 0x7800000u) | ((bw << 12) & 0x8000000u) | ((bw << 14) & 0x10000000u) | ((bw << 16) & 0x20000000u) | ((bw << 18) & 0x40000000u) | ((bw << 20) & 0x80000000u);
encoded[2] = ((bw >> 10) & 0x1u);
}
void ReadMode0(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
gy |= ((encoded[0] << 2) & 0x10u);
by |= ((encoded[0] << 1) & 0x10u);
bz |= (encoded[0] & 0x10u);
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0x1fu);
gz |= ((encoded[1] >> 4) & 0x10u);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x1fu);
bz |= ((encoded[1] >> 18) & 0x1u);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x1fu);
bz |= ((encoded[1] >> 27) & 0x2u);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x1fu);
bz |= ((encoded[2] >> 4) & 0x4u);
rz |= ((encoded[2] >> 7) & 0x1fu);
bz |= ((encoded[2] >> 9) & 0x8u);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode1(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
gy |= ((encoded[0] << 3) & 0x20u);
gz |= ((encoded[0] << 1) & 0x30u);
rw |= ((encoded[0] >> 5) & 0x7fu);
bz |= ((encoded[0] >> 12) & 0x3u);
by |= ((encoded[0] >> 10) & 0x10u);
gw |= ((encoded[0] >> 15) & 0x7fu);
by |= ((encoded[0] >> 17) & 0x20u);
bz |= ((encoded[0] >> 21) & 0x4u);
gy |= ((encoded[0] >> 20) & 0x10u);
bw |= ((encoded[0] >> 25) & 0x7fu);
bz |= ((encoded[1] << 3) & 0x8u);
bz |= ((encoded[1] << 4) & 0x20u);
bz |= ((encoded[1] << 2) & 0x10u);
rx |= ((encoded[1] >> 3) & 0x3fu);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x3fu);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x3fu);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x3fu);
rz |= ((encoded[2] >> 7) & 0x3fu);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode2(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0x1fu);
rw |= ((encoded[1] << 2) & 0x400u);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0xfu);
gw |= ((encoded[1] >> 7) & 0x400u);
bz |= ((encoded[1] >> 18) & 0x1u);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0xfu);
bw |= ((encoded[1] >> 17) & 0x400u);
bz |= ((encoded[1] >> 27) & 0x2u);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x1fu);
bz |= ((encoded[2] >> 4) & 0x4u);
rz |= ((encoded[2] >> 7) & 0x1fu);
bz |= ((encoded[2] >> 9) & 0x8u);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode3(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0xfu);
rw |= ((encoded[1] << 3) & 0x400u);
gz |= ((encoded[1] >> 4) & 0x10u);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x1fu);
gw |= ((encoded[1] >> 8) & 0x400u);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0xfu);
bw |= ((encoded[1] >> 17) & 0x400u);
bz |= ((encoded[1] >> 27) & 0x2u);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0xfu);
bz |= ((encoded[2] >> 5) & 0x1u);
bz |= ((encoded[2] >> 4) & 0x4u);
rz |= ((encoded[2] >> 7) & 0xfu);
gy |= ((encoded[2] >> 7) & 0x10u);
bz |= ((encoded[2] >> 9) & 0x8u);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode4(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0xfu);
rw |= ((encoded[1] << 3) & 0x400u);
by |= ((encoded[1] >> 4) & 0x10u);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0xfu);
gw |= ((encoded[1] >> 7) & 0x400u);
bz |= ((encoded[1] >> 18) & 0x1u);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x1fu);
bw |= ((encoded[1] >> 18) & 0x400u);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0xfu);
bz |= ((encoded[2] >> 4) & 0x6u);
rz |= ((encoded[2] >> 7) & 0xfu);
bz |= ((encoded[2] >> 7) & 0x10u);
bz |= ((encoded[2] >> 9) & 0x8u);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode5(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x1ffu);
by |= ((encoded[0] >> 10) & 0x10u);
gw |= ((encoded[0] >> 15) & 0x1ffu);
gy |= ((encoded[0] >> 20) & 0x10u);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x180u);
bz |= ((encoded[1] << 2) & 0x10u);
rx |= ((encoded[1] >> 3) & 0x1fu);
gz |= ((encoded[1] >> 4) & 0x10u);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x1fu);
bz |= ((encoded[1] >> 18) & 0x1u);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x1fu);
bz |= ((encoded[1] >> 27) & 0x2u);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x1fu);
bz |= ((encoded[2] >> 4) & 0x4u);
rz |= ((encoded[2] >> 7) & 0x1fu);
bz |= ((encoded[2] >> 9) & 0x8u);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode6(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0xffu);
gz |= ((encoded[0] >> 9) & 0x10u);
by |= ((encoded[0] >> 10) & 0x10u);
gw |= ((encoded[0] >> 15) & 0xffu);
bz |= ((encoded[0] >> 21) & 0x4u);
gy |= ((encoded[0] >> 20) & 0x10u);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x80u);
bz |= ((encoded[1] << 2) & 0x18u);
rx |= ((encoded[1] >> 3) & 0x3fu);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x1fu);
bz |= ((encoded[1] >> 18) & 0x1u);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x1fu);
bz |= ((encoded[1] >> 27) & 0x2u);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x3fu);
rz |= ((encoded[2] >> 7) & 0x3fu);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode7(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0xffu);
bz |= ((encoded[0] >> 13) & 0x1u);
by |= ((encoded[0] >> 10) & 0x10u);
gw |= ((encoded[0] >> 15) & 0xffu);
gy |= ((encoded[0] >> 18) & 0x20u);
gy |= ((encoded[0] >> 20) & 0x10u);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x80u);
gz |= ((encoded[1] << 4) & 0x20u);
bz |= ((encoded[1] << 2) & 0x10u);
rx |= ((encoded[1] >> 3) & 0x1fu);
gz |= ((encoded[1] >> 4) & 0x10u);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x3fu);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x1fu);
bz |= ((encoded[1] >> 27) & 0x2u);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x1fu);
bz |= ((encoded[2] >> 4) & 0x4u);
rz |= ((encoded[2] >> 7) & 0x1fu);
bz |= ((encoded[2] >> 9) & 0x8u);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode8(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0xffu);
bz |= ((encoded[0] >> 12) & 0x2u);
by |= ((encoded[0] >> 10) & 0x10u);
gw |= ((encoded[0] >> 15) & 0xffu);
by |= ((encoded[0] >> 18) & 0x20u);
gy |= ((encoded[0] >> 20) & 0x10u);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x80u);
bz |= ((encoded[1] << 4) & 0x20u);
bz |= ((encoded[1] << 2) & 0x10u);
rx |= ((encoded[1] >> 3) & 0x1fu);
gz |= ((encoded[1] >> 4) & 0x10u);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x1fu);
bz |= ((encoded[1] >> 18) & 0x1u);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x3fu);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x1fu);
bz |= ((encoded[2] >> 4) & 0x4u);
rz |= ((encoded[2] >> 7) & 0x1fu);
bz |= ((encoded[2] >> 9) & 0x8u);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode9(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3fu);
gz |= ((encoded[0] >> 7) & 0x10u);
bz |= ((encoded[0] >> 12) & 0x3u);
by |= ((encoded[0] >> 10) & 0x10u);
gw |= ((encoded[0] >> 15) & 0x3fu);
gy |= ((encoded[0] >> 16) & 0x20u);
by |= ((encoded[0] >> 17) & 0x20u);
bz |= ((encoded[0] >> 21) & 0x4u);
gy |= ((encoded[0] >> 20) & 0x10u);
bw |= ((encoded[0] >> 25) & 0x3fu);
gz |= ((encoded[0] >> 26) & 0x20u);
bz |= ((encoded[1] << 3) & 0x8u);
bz |= ((encoded[1] << 4) & 0x20u);
bz |= ((encoded[1] << 2) & 0x10u);
rx |= ((encoded[1] >> 3) & 0x3fu);
gy |= ((encoded[1] >> 9) & 0xfu);
gx |= ((encoded[1] >> 13) & 0x3fu);
gz |= ((encoded[1] >> 19) & 0xfu);
bx |= ((encoded[1] >> 23) & 0x3fu);
by |= ((encoded[1] >> 29) & 0x7u);
by |= ((encoded[2] << 3) & 0x8u);
ry |= ((encoded[2] >> 1) & 0x3fu);
rz |= ((encoded[2] >> 7) & 0x3fu);
d |= ((encoded[2] >> 13) & 0x1fu);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode10(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0x3ffu);
gx |= ((encoded[1] >> 13) & 0x3ffu);
bx |= ((encoded[1] >> 23) & 0x1ffu);
bx |= ((encoded[2] << 9) & 0x200u);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode11(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0x1ffu);
rw |= ((encoded[1] >> 2) & 0x400u);
gx |= ((encoded[1] >> 13) & 0x1ffu);
gw |= ((encoded[1] >> 12) & 0x400u);
bx |= ((encoded[1] >> 23) & 0x1ffu);
bw |= ((encoded[2] << 10) & 0x400u);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode12(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0xffu);
rw |= (encoded[1] & 0x800u);
rw |= ((encoded[1] >> 2) & 0x400u);
gx |= ((encoded[1] >> 13) & 0xffu);
gw |= ((encoded[1] >> 10) & 0x800u);
gw |= ((encoded[1] >> 12) & 0x400u);
bx |= ((encoded[1] >> 23) & 0xffu);
bw |= ((encoded[1] >> 20) & 0x800u);
bw |= ((encoded[2] << 10) & 0x400u);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
void ReadMode13(const uint32_t *encoded, uint16_t &outD, uint16_t &outRW, uint16_t &outRX, uint16_t &outRY, uint16_t &outRZ, uint16_t &outGW, uint16_t &outGX, uint16_t &outGY, uint16_t &outGZ, uint16_t &outBW, uint16_t &outBX, uint16_t &outBY, uint16_t &outBZ)
{
uint16_t d = 0;
uint16_t rw = 0;
uint16_t rx = 0;
uint16_t ry = 0;
uint16_t rz = 0;
uint16_t gw = 0;
uint16_t gx = 0;
uint16_t gy = 0;
uint16_t gz = 0;
uint16_t bw = 0;
uint16_t bx = 0;
uint16_t by = 0;
uint16_t bz = 0;
rw |= ((encoded[0] >> 5) & 0x3ffu);
gw |= ((encoded[0] >> 15) & 0x3ffu);
bw |= ((encoded[0] >> 25) & 0x7fu);
bw |= ((encoded[1] << 7) & 0x380u);
rx |= ((encoded[1] >> 3) & 0xfu);
rw |= ((encoded[1] << 8) & 0x8000u);
rw |= ((encoded[1] << 6) & 0x4000u);
rw |= ((encoded[1] << 4) & 0x2000u);
rw |= ((encoded[1] << 2) & 0x1000u);
rw |= (encoded[1] & 0x800u);
rw |= ((encoded[1] >> 2) & 0x400u);
gx |= ((encoded[1] >> 13) & 0xfu);
gw |= ((encoded[1] >> 2) & 0x8000u);
gw |= ((encoded[1] >> 4) & 0x4000u);
gw |= ((encoded[1] >> 6) & 0x2000u);
gw |= ((encoded[1] >> 8) & 0x1000u);
gw |= ((encoded[1] >> 10) & 0x800u);
gw |= ((encoded[1] >> 12) & 0x400u);
bx |= ((encoded[1] >> 23) & 0xfu);
bw |= ((encoded[1] >> 12) & 0x8000u);
bw |= ((encoded[1] >> 14) & 0x4000u);
bw |= ((encoded[1] >> 16) & 0x2000u);
bw |= ((encoded[1] >> 18) & 0x1000u);
bw |= ((encoded[1] >> 20) & 0x800u);
bw |= ((encoded[2] << 10) & 0x400u);
outD = d;
outRW = rw;
outRX = rx;
outRY = ry;
outRZ = rz;
outGW = gw;
outGX = gx;
outGY = gy;
outGZ = gz;
outBW = bw;
outBX = bx;
outBY = by;
outBZ = bz;
}
const ReadFunc_t g_readFuncs[14] =
{
ReadMode0,
ReadMode1,
ReadMode2,
ReadMode3,
ReadMode4,
ReadMode5,
ReadMode6,
ReadMode7,
ReadMode8,
ReadMode9,
ReadMode10,
ReadMode11,
ReadMode12,
ReadMode13
};
const WriteFunc_t g_writeFuncs[14] =
{
WriteMode0,
WriteMode1,
WriteMode2,
WriteMode3,
WriteMode4,
WriteMode5,
WriteMode6,
WriteMode7,
WriteMode8,
WriteMode9,
WriteMode10,
WriteMode11,
WriteMode12,
WriteMode13
};
}
}
#endif

View file

@ -0,0 +1,16 @@
#pragma once
#include <stdint.h>
#include "ConvectionKernels_BC6H_IO.h"
namespace cvtt
{
namespace BC6H_IO
{
typedef void (*ReadFunc_t)(const uint32_t *encoded, uint16_t &d, uint16_t &rw, uint16_t &rx, uint16_t &ry, uint16_t &rz, uint16_t &gw, uint16_t &gx, uint16_t &gy, uint16_t &gz, uint16_t &bw, uint16_t &bx, uint16_t &by, uint16_t &bz);
typedef void (*WriteFunc_t)(uint32_t *encoded, uint16_t m, uint16_t d, uint16_t rw, uint16_t rx, uint16_t ry, uint16_t rz, uint16_t gw, uint16_t gx, uint16_t gy, uint16_t gz, uint16_t bw, uint16_t bx, uint16_t by, uint16_t bz);
extern const ReadFunc_t g_readFuncs[14];
extern const WriteFunc_t g_writeFuncs[14];
}
}

View file

@ -0,0 +1,17 @@
#pragma once
#include <stdint.h>
namespace cvtt { namespace Tables { namespace BC7Prio {
extern const uint16_t *g_bc7PrioCodesRGB;
extern const int g_bc7NumPrioCodesRGB;
extern const uint16_t *g_bc7PrioCodesRGBA;
extern const int g_bc7NumPrioCodesRGBA;
int UnpackMode(uint16_t packed);
int UnpackSeedPointCount(uint16_t packed);
int UnpackPartition(uint16_t packed);
int UnpackRotation(uint16_t packed);
int UnpackIndexSelector(uint16_t packed);
}}}

File diff suppressed because it is too large Load diff

View file

@ -1,6 +1,8 @@
#pragma once #pragma once
#include <stdint.h> #include <stdint.h>
// This file is generated by the MakeTables app. Do not edit this file manually.
namespace cvtt { namespace Tables { namespace BC7SC { namespace cvtt { namespace Tables { namespace BC7SC {
struct TableEntry struct TableEntry

View file

@ -0,0 +1,46 @@
/*
Convection Texture Tools
Copyright (c) 2018-2019 Eric Lasota
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------------
Portions based on DirectX Texture Library (DirectXTex)
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
http://go.microsoft.com/fwlink/?LinkId=248926
*/
#include "ConvectionKernels_Config.h"
#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
#include "ConvectionKernels_BCCommon.h"
int cvtt::Internal::BCCommon::TweakRoundsForRange(int range)
{
if (range == 3)
return 3;
return 4;
}
#endif

View file

@ -0,0 +1,104 @@
#pragma once
#ifndef __CVTT_BCCOMMON_H__
#define __CVTT_BCCOMMON_H__
#include "ConvectionKernels_AggregatedError.h"
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
namespace Internal
{
class BCCommon
{
public:
typedef ParallelMath::Float MFloat;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::AInt16 MAInt16;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::SInt32 MSInt32;
static int TweakRoundsForRange(int range);
template<int TVectorSize>
static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, AggregatedError<TVectorSize> &aggError)
{
for (int ch = 0; ch < numRealChannels; ch++)
aggError.Add(ParallelMath::SqDiffUInt8(reconstructed[ch], original[ch]), ch);
}
template<int TVectorSize>
static void ComputeErrorLDR(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], AggregatedError<TVectorSize> &aggError)
{
ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, TVectorSize, aggError);
}
template<int TVectorSize>
static MFloat ComputeErrorLDRSimple(uint32_t flags, const MUInt15 reconstructed[TVectorSize], const MUInt15 original[TVectorSize], int numRealChannels, const float *channelWeightsSq)
{
AggregatedError<TVectorSize> aggError;
ComputeErrorLDR<TVectorSize>(flags, reconstructed, original, numRealChannels, aggError);
return aggError.Finalize(flags, channelWeightsSq);
}
template<int TVectorSize>
static MFloat ComputeErrorHDRFast(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
{
MFloat error = ParallelMath::MakeFloatZero();
if (flags & Flags::Uniform)
{
for (int ch = 0; ch < TVectorSize; ch++)
error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]);
}
else
{
for (int ch = 0; ch < TVectorSize; ch++)
error = error + ParallelMath::SqDiffSInt16(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
}
return error;
}
template<int TVectorSize>
static MFloat ComputeErrorHDRSlow(uint32_t flags, const MSInt16 reconstructed[TVectorSize], const MSInt16 original[TVectorSize], const float channelWeightsSq[TVectorSize])
{
MFloat error = ParallelMath::MakeFloatZero();
if (flags & Flags::Uniform)
{
for (int ch = 0; ch < TVectorSize; ch++)
error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]);
}
else
{
for (int ch = 0; ch < TVectorSize; ch++)
error = error + ParallelMath::SqDiff2CL(reconstructed[ch], original[ch]) * ParallelMath::MakeFloat(channelWeightsSq[ch]);
}
return error;
}
template<int TChannelCount>
static void PreWeightPixelsLDR(MFloat preWeightedPixels[16][TChannelCount], const MUInt15 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
{
for (int px = 0; px < 16; px++)
{
for (int ch = 0; ch < TChannelCount; ch++)
preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
}
}
template<int TChannelCount>
static void PreWeightPixelsHDR(MFloat preWeightedPixels[16][TChannelCount], const MSInt16 pixels[16][TChannelCount], const float channelWeights[TChannelCount])
{
for (int px = 0; px < 16; px++)
{
for (int ch = 0; ch < TChannelCount; ch++)
preWeightedPixels[px][ch] = ParallelMath::ToFloat(pixels[px][ch]) * channelWeights[ch];
}
}
};
}
}
#endif

View file

@ -0,0 +1,12 @@
#pragma once
#ifndef __CVTT_CONFIG_H__
#define __CVTT_CONFIG_H__
#if (defined(_M_IX86_FP) && _M_IX86_FP >= 2) || defined(_M_X64) || defined(__SSE2__)
#define CVTT_USE_SSE2
#endif
// Define this to compile everything as a single source file
//#define CVTT_SINGLE_FILE
#endif

3147
thirdparty/cvtt/ConvectionKernels_ETC.cpp vendored Normal file

File diff suppressed because it is too large Load diff

126
thirdparty/cvtt/ConvectionKernels_ETC.h vendored Normal file
View file

@ -0,0 +1,126 @@
#pragma once
#ifndef __CVTT_CONVECTIONKERNELS_ETC_H__
#define __CVTT_CONVECTIONKERNELS_ETC_H__
#include "ConvectionKernels.h"
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
struct Options;
namespace Internal
{
class ETCComputer
{
public:
static void CompressETC1Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC1CompressionData *compressionData, const Options &options);
static void CompressETC2Block(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, ETC2CompressionData *compressionData, const Options &options, bool punchthroughAlpha);
static void CompressETC2AlphaBlock(uint8_t *outputBuffer, const PixelBlockU8 *inputBlocks, const Options &options);
static void CompressEACBlock(uint8_t *outputBuffer, const PixelBlockScalarS16 *inputBlocks, bool isSigned, const Options &options);
static ETC2CompressionData *AllocETC2Data(cvtt::Kernels::allocFunc_t allocFunc, void *context, const cvtt::Options &options);
static void ReleaseETC2Data(ETC2CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc);
static ETC1CompressionData *AllocETC1Data(cvtt::Kernels::allocFunc_t allocFunc, void *context);
static void ReleaseETC1Data(ETC1CompressionData *compressionData, cvtt::Kernels::freeFunc_t freeFunc);
private:
typedef ParallelMath::Float MFloat;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::SInt32 MSInt32;
typedef ParallelMath::UInt31 MUInt31;
struct DifferentialResolveStorage
{
static const unsigned int MaxAttemptsPerSector = 57 + 81 + 81 + 81 + 81 + 81 + 81 + 81;
MUInt15 diffNumAttempts[2];
MFloat diffErrors[2][MaxAttemptsPerSector];
MUInt16 diffSelectors[2][MaxAttemptsPerSector];
MUInt15 diffColors[2][MaxAttemptsPerSector];
MUInt15 diffTables[2][MaxAttemptsPerSector];
uint16_t attemptSortIndexes[2][MaxAttemptsPerSector];
};
struct HModeEval
{
MFloat errors[62][16];
MUInt16 signBits[62];
MUInt15 uniqueQuantizedColors[62];
MUInt15 numUniqueColors[2];
};
struct ETC1CompressionDataInternal : public cvtt::ETC1CompressionData
{
explicit ETC1CompressionDataInternal(void *context)
: m_context(context)
{
}
DifferentialResolveStorage m_drs;
void *m_context;
};
struct ETC2CompressionDataInternal : public cvtt::ETC2CompressionData
{
explicit ETC2CompressionDataInternal(void *context, const cvtt::Options &options);
HModeEval m_h;
DifferentialResolveStorage m_drs;
void *m_context;
float m_chromaSideAxis0[3];
float m_chromaSideAxis1[3];
};
static MFloat ComputeErrorUniform(const MUInt15 pixelA[3], const MUInt15 pixelB[3]);
static MFloat ComputeErrorWeighted(const MUInt15 reconstructed[3], const MFloat pixelB[3], const Options options);
static MFloat ComputeErrorFakeBT709(const MUInt15 reconstructed[3], const MFloat pixelB[3]);
static void TestHalfBlock(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const MSInt16 modifiers[4], bool isDifferential, const Options &options);
static void TestHalfBlockPunchthrough(MFloat &outError, MUInt16 &outSelectors, MUInt15 quantizedPackedColor, const MUInt15 pixels[8][3], const MFloat preWeightedPixels[8][3], const ParallelMath::Int16CompFlag isTransparent[8], const MUInt15 modifier, const Options &options);
static void FindBestDifferentialCombination(int flip, int d, const ParallelMath::Int16CompFlag canIgnoreSector[2], ParallelMath::Int16CompFlag& bestIsThisMode, MFloat& bestTotalError, MUInt15& bestFlip, MUInt15& bestD, MUInt15 bestColors[2], MUInt16 bestSelectors[2], MUInt15 bestTables[2], DifferentialResolveStorage &drs);
static ParallelMath::Int16CompFlag ETCDifferentialIsLegalForChannel(const MUInt15 &a, const MUInt15 &b);
static ParallelMath::Int16CompFlag ETCDifferentialIsLegal(const MUInt15 &a, const MUInt15 &b);
static bool ETCDifferentialIsLegalForChannelScalar(const uint16_t &a, const uint16_t &b);
static bool ETCDifferentialIsLegalScalar(const uint16_t &a, const uint16_t &b);
static void EncodeTMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options);
static void EncodeHMode(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag groupings[16], const MUInt15 pixels[16][3], HModeEval &he, const MFloat preWeightedPixels[16][3], const Options &options);
static void EncodeVirtualTModePunchthrough(uint8_t *outputBuffer, MFloat &bestError, const ParallelMath::Int16CompFlag isIsolated[16], const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], const ParallelMath::Int16CompFlag& anyTransparent, const ParallelMath::Int16CompFlag& allTransparent, const Options &options);
static MUInt15 DecodePlanarCoeff(const MUInt15 &coeff, int ch);
static void EncodePlanar(uint8_t *outputBuffer, MFloat &bestError, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const Options &options);
static void CompressETC1BlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], DifferentialResolveStorage& compressionData, const Options &options, bool punchthrough);
static void CompressETC1PunchthroughBlockInternal(MFloat &bestTotalError, uint8_t *outputBuffer, const MUInt15 pixels[16][3], const MFloat preWeightedPixels[16][3], const ParallelMath::Int16CompFlag isTransparent[16], DifferentialResolveStorage& compressionData, const Options &options);
static void CompressETC2AlphaBlockInternal(uint8_t *outputBuffer, const MUInt15 pixels[16], bool is11Bit, bool isSigned, const Options &options);
static void ExtractBlocks(MUInt15 pixels[16][3], MFloat preWeightedPixels[16][3], const PixelBlockU8 *inputBlocks, const Options &options);
static void ResolveHalfBlockFakeBT709RoundingAccurate(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential);
static void ResolveHalfBlockFakeBT709RoundingFast(MUInt15 quantized[3], const MUInt15 sectorCumulative[3], bool isDifferential);
static void ResolveTHFakeBT709Rounding(MUInt15 quantized[3], const MUInt15 target[3], const MUInt15 &granularity);
static void ConvertToFakeBT709(MFloat yuv[3], const MUInt15 color[3]);
static void ConvertToFakeBT709(MFloat yuv[3], const MFloat color[3]);
static void ConvertToFakeBT709(MFloat yuv[3], const MFloat &r, const MFloat &g, const MFloat &b);
static void ConvertFromFakeBT709(MFloat rgb[3], const MFloat yuv[3]);
static void QuantizeETC2Alpha(int tableIndex, const MUInt15& value, const MUInt15& baseValue, const MUInt15& multiplier, bool is11Bit, bool isSigned, MUInt15& outIndexes, MUInt15& outQuantizedValues);
static void EmitTModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 lineColor[3], const ParallelMath::ScalarUInt16 isolatedColor[3], int32_t packedSelectors, ParallelMath::ScalarUInt16 table, bool opaque);
static void EmitHModeBlock(uint8_t *outputBuffer, const ParallelMath::ScalarUInt16 blockColors[2], ParallelMath::ScalarUInt16 sectorBits, ParallelMath::ScalarUInt16 signBits, ParallelMath::ScalarUInt16 table, bool opaque);
static void EmitETC1Block(uint8_t *outputBuffer, int blockBestFlip, int blockBestD, const int blockBestColors[2][3], const int blockBestTables[2], const ParallelMath::ScalarUInt16 blockBestSelectors[2], bool transparent);
static const int g_flipTables[2][2][8];
};
}
}
#endif

View file

@ -0,0 +1,29 @@
#include <stdint.h>
namespace cvtt
{
namespace Tables
{
namespace ETC1
{
const int16_t g_potentialOffsets4[] =
{
57, -64, -58, -54, -52, -48, -46, -44, -42, -40, -38, -36, -34, -32, -30, -28, -26, -24, -22, -20, -18, -16, -14, -12, -10, -8, -6, -4, -2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 52, 54, 58, 64,
81, -136, -124, -114, -112, -102, -100, -92, -90, -88, -80, -78, -76, -70, -68, -66, -64, -58, -56, -54, -52, -48, -46, -44, -42, -40, -36, -34, -32, -30, -26, -24, -22, -20, -18, -14, -12, -10, -8, -4, -2, 0, 2, 4, 8, 10, 12, 14, 18, 20, 22, 24, 26, 30, 32, 34, 36, 40, 42, 44, 46, 48, 52, 54, 56, 58, 64, 66, 68, 70, 76, 78, 80, 88, 90, 92, 100, 102, 112, 114, 124, 136,
81, -232, -212, -194, -192, -174, -172, -156, -154, -152, -136, -134, -132, -118, -116, -114, -112, -98, -96, -94, -92, -80, -78, -76, -74, -72, -60, -58, -56, -54, -42, -40, -38, -36, -34, -22, -20, -18, -16, -4, -2, 0, 2, 4, 16, 18, 20, 22, 34, 36, 38, 40, 42, 54, 56, 58, 60, 72, 74, 76, 78, 80, 92, 94, 96, 98, 112, 114, 116, 118, 132, 134, 136, 152, 154, 156, 172, 174, 192, 194, 212, 232,
81, -336, -307, -281, -278, -252, -249, -226, -223, -220, -197, -194, -191, -171, -168, -165, -162, -142, -139, -136, -133, -116, -113, -110, -107, -104, -87, -84, -81, -78, -61, -58, -55, -52, -49, -32, -29, -26, -23, -6, -3, 0, 3, 6, 23, 26, 29, 32, 49, 52, 55, 58, 61, 78, 81, 84, 87, 104, 107, 110, 113, 116, 133, 136, 139, 142, 162, 165, 168, 171, 191, 194, 197, 220, 223, 226, 249, 252, 278, 281, 307, 336,
81, -480, -438, -402, -396, -360, -354, -324, -318, -312, -282, -276, -270, -246, -240, -234, -228, -204, -198, -192, -186, -168, -162, -156, -150, -144, -126, -120, -114, -108, -90, -84, -78, -72, -66, -48, -42, -36, -30, -12, -6, 0, 6, 12, 30, 36, 42, 48, 66, 72, 78, 84, 90, 108, 114, 120, 126, 144, 150, 156, 162, 168, 186, 192, 198, 204, 228, 234, 240, 246, 270, 276, 282, 312, 318, 324, 354, 360, 396, 402, 438, 480,
81, -640, -584, -536, -528, -480, -472, -432, -424, -416, -376, -368, -360, -328, -320, -312, -304, -272, -264, -256, -248, -224, -216, -208, -200, -192, -168, -160, -152, -144, -120, -112, -104, -96, -88, -64, -56, -48, -40, -16, -8, 0, 8, 16, 40, 48, 56, 64, 88, 96, 104, 112, 120, 144, 152, 160, 168, 192, 200, 208, 216, 224, 248, 256, 264, 272, 304, 312, 320, 328, 360, 368, 376, 416, 424, 432, 472, 480, 528, 536, 584, 640,
81, -848, -775, -709, -702, -636, -629, -570, -563, -556, -497, -490, -483, -431, -424, -417, -410, -358, -351, -344, -337, -292, -285, -278, -271, -264, -219, -212, -205, -198, -153, -146, -139, -132, -125, -80, -73, -66, -59, -14, -7, 0, 7, 14, 59, 66, 73, 80, 125, 132, 139, 146, 153, 198, 205, 212, 219, 264, 271, 278, 285, 292, 337, 344, 351, 358, 410, 417, 424, 431, 483, 490, 497, 556, 563, 570, 629, 636, 702, 709, 775, 848,
81, -1464, -1328, -1234, -1192, -1098, -1056, -1004, -962, -920, -868, -826, -784, -774, -732, -690, -648, -638, -596, -554, -544, -512, -502, -460, -418, -408, -376, -366, -324, -314, -282, -272, -230, -188, -178, -146, -136, -94, -84, -52, -42, 0, 42, 52, 84, 94, 136, 146, 178, 188, 230, 272, 282, 314, 324, 366, 376, 408, 418, 460, 502, 512, 544, 554, 596, 638, 648, 690, 732, 774, 784, 826, 868, 920, 962, 1004, 1056, 1098, 1192, 1234, 1328, 1464
};
const unsigned int g_maxPotentialOffsets = 81;
const int16_t g_thModifierTable[8] =
{
3, 6, 11, 16, 23, 32, 41, 64
};
}
}
}

View file

@ -0,0 +1,35 @@
#include <stdint.h>
namespace cvtt
{
namespace Tables
{
namespace ETC2
{
const int16_t g_thModifierTable[8] =
{
3, 6, 11, 16, 23, 32, 41, 64
};
const int16_t g_alphaModifierTablePositive[16][4] =
{
{ 2, 5, 8, 14, },
{ 2, 6, 9, 12, },
{ 1, 4, 7, 12, },
{ 1, 3, 5, 12, },
{ 2, 5, 7, 11, },
{ 2, 6, 8, 10, },
{ 3, 6, 7, 10, },
{ 2, 4, 7, 10, },
{ 1, 5, 7, 9, },
{ 1, 4, 7, 9, },
{ 1, 3, 7, 9, },
{ 1, 4, 6, 9, },
{ 2, 3, 6, 9, },
{ 0, 1, 2, 9, },
{ 3, 5, 7, 8, },
{ 2, 4, 6, 8, },
};
}
}
}

View file

@ -0,0 +1,27 @@
#pragma once
#include <stdint.h>
// This file is generated by the MakeTables app. Do not edit this file manually.
namespace cvtt { namespace Tables { namespace ETC2 {
const int g_alphaRoundingTableWidth = 13;
const uint8_t g_alphaRoundingTables[16][13] =
{
{ 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 2, 3 },
{ 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3 },
{ 0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3 },
{ 0, 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3 },
{ 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3 },
{ 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3 },
{ 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3 },
{ 0, 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
{ 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3 },
{ 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
{ 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 },
{ 0, 0, 0, 1, 1, 1, 2, 2, 3, 3, 3, 3, 3 },
{ 0, 0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3 },
{ 0, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3 },
{ 0, 0, 0, 0, 0, 1, 1, 2, 3, 3, 3, 3, 3 },
{ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 3, 3, 3 },
};
}}}

View file

@ -0,0 +1,181 @@
#pragma once
#ifndef __CVTT_ENDPOINTREFINER_H__
#define __CVTT_ENDPOINTREFINER_H__
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
namespace Internal
{
// Solve for a, b where v = a*t + b
// This allows endpoints to be mapped to where T=0 and T=1
// Least squares from totals:
// a = (tv - t*v/w)/(tt - t*t/w)
// b = (v - a*t)/w
template<int TVectorSize>
class EndpointRefiner
{
public:
typedef ParallelMath::Float MFloat;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::AInt16 MAInt16;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::SInt32 MSInt32;
MFloat m_tv[TVectorSize];
MFloat m_v[TVectorSize];
MFloat m_tt;
MFloat m_t;
MFloat m_w;
int m_wu;
float m_rcpMaxIndex;
float m_channelWeights[TVectorSize];
float m_rcpChannelWeights[TVectorSize];
void Init(int indexRange, const float channelWeights[TVectorSize])
{
for (int ch = 0; ch < TVectorSize; ch++)
{
m_tv[ch] = ParallelMath::MakeFloatZero();
m_v[ch] = ParallelMath::MakeFloatZero();
}
m_tt = ParallelMath::MakeFloatZero();
m_t = ParallelMath::MakeFloatZero();
m_w = ParallelMath::MakeFloatZero();
m_rcpMaxIndex = 1.0f / static_cast<float>(indexRange - 1);
for (int ch = 0; ch < TVectorSize; ch++)
{
m_channelWeights[ch] = channelWeights[ch];
m_rcpChannelWeights[ch] = 1.0f;
if (m_channelWeights[ch] != 0.0f)
m_rcpChannelWeights[ch] = 1.0f / channelWeights[ch];
}
m_wu = 0;
}
void ContributePW(const MFloat *pwFloatPixel, const MUInt15 &index, const MFloat &weight)
{
MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
for (int ch = 0; ch < TVectorSize; ch++)
{
MFloat v = pwFloatPixel[ch] * weight;
m_tv[ch] = m_tv[ch] + t * v;
m_v[ch] = m_v[ch] + v;
}
m_tt = m_tt + weight * t * t;
m_t = m_t + weight * t;
m_w = m_w + weight;
}
void ContributeUnweightedPW(const MFloat *pwFloatPixel, const MUInt15 &index, int numRealChannels)
{
MFloat t = ParallelMath::ToFloat(index) * m_rcpMaxIndex;
for (int ch = 0; ch < numRealChannels; ch++)
{
MFloat v = pwFloatPixel[ch];
m_tv[ch] = m_tv[ch] + t * v;
m_v[ch] = m_v[ch] + v;
}
m_tt = m_tt + t * t;
m_t = m_t + t;
m_wu++;
}
void ContributeUnweightedPW(const MFloat *floatPixel, const MUInt15 &index)
{
ContributeUnweightedPW(floatPixel, index, TVectorSize);
}
void GetRefinedEndpoints(MFloat endPoint[2][TVectorSize])
{
// a = (tv - t*v/w)/(tt - t*t/w)
// b = (v - a*t)/w
MFloat w = m_w + ParallelMath::MakeFloat(static_cast<float>(m_wu));
ParallelMath::MakeSafeDenominator(w);
MFloat wRcp = ParallelMath::Reciprocal(w);
MFloat adenom = (m_tt * w - m_t * m_t) * wRcp;
ParallelMath::FloatCompFlag adenomZero = ParallelMath::Equal(adenom, ParallelMath::MakeFloatZero());
ParallelMath::ConditionalSet(adenom, adenomZero, ParallelMath::MakeFloat(1.0f));
for (int ch = 0; ch < TVectorSize; ch++)
{
/*
if (adenom == 0.0)
p1 = p2 = er.v / er.w;
else
{
float4 a = (er.tv - er.t*er.v / er.w) / adenom;
float4 b = (er.v - a * er.t) / er.w;
p1 = b;
p2 = a + b;
}
*/
MFloat a = (m_tv[ch] - m_t * m_v[ch] * wRcp) / adenom;
MFloat b = (m_v[ch] - a * m_t) * wRcp;
MFloat p1 = b;
MFloat p2 = a + b;
ParallelMath::ConditionalSet(p1, adenomZero, (m_v[ch] * wRcp));
ParallelMath::ConditionalSet(p2, adenomZero, p1);
// Unweight
float inverseWeight = m_rcpChannelWeights[ch];
endPoint[0][ch] = p1 * inverseWeight;
endPoint[1][ch] = p2 * inverseWeight;
}
}
void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], int numRealChannels, const ParallelMath::RoundTowardNearestForScope *roundingMode)
{
MFloat floatEndPoint[2][TVectorSize];
GetRefinedEndpoints(floatEndPoint);
for (int epi = 0; epi < 2; epi++)
for (int ch = 0; ch < TVectorSize; ch++)
endPoint[epi][ch] = ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(floatEndPoint[epi][ch], 0.0f, 255.0f), roundingMode);
}
void GetRefinedEndpointsLDR(MUInt15 endPoint[2][TVectorSize], const ParallelMath::RoundTowardNearestForScope *roundingMode)
{
GetRefinedEndpointsLDR(endPoint, TVectorSize, roundingMode);
}
void GetRefinedEndpointsHDR(MSInt16 endPoint[2][TVectorSize], bool isSigned, const ParallelMath::RoundTowardNearestForScope *roundingMode)
{
MFloat floatEndPoint[2][TVectorSize];
GetRefinedEndpoints(floatEndPoint);
for (int epi = 0; epi < 2; epi++)
{
for (int ch = 0; ch < TVectorSize; ch++)
{
MFloat f = floatEndPoint[epi][ch];
if (isSigned)
endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToS16(ParallelMath::Clamp(f, -31743.0f, 31743.0f), roundingMode));
else
endPoint[epi][ch] = ParallelMath::LosslessCast<MSInt16>::Cast(ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(f, 0.0f, 31743.0f), roundingMode));
}
}
}
};
}
}
#endif

View file

@ -0,0 +1,153 @@
#pragma once
#ifndef __CVTT_ENDPOINTSELECTOR_H__
#define __CVTT_ENDPOINTSELECTOR_H__
#include "ConvectionKernels_ParallelMath.h"
#include "ConvectionKernels_UnfinishedEndpoints.h"
#include "ConvectionKernels_PackedCovarianceMatrix.h"
namespace cvtt
{
namespace Internal
{
static const int NumEndpointSelectorPasses = 3;
template<int TVectorSize, int TIterationCount>
class EndpointSelector
{
public:
typedef ParallelMath::Float MFloat;
EndpointSelector()
{
for (int ch = 0; ch < TVectorSize; ch++)
{
m_centroid[ch] = ParallelMath::MakeFloatZero();
m_direction[ch] = ParallelMath::MakeFloatZero();
}
m_weightTotal = ParallelMath::MakeFloatZero();
m_minDist = ParallelMath::MakeFloat(FLT_MAX);
m_maxDist = ParallelMath::MakeFloat(-FLT_MAX);
}
void ContributePass(const MFloat *value, int pass, const MFloat &weight)
{
if (pass == 0)
ContributeCentroid(value, weight);
else if (pass == 1)
ContributeDirection(value, weight);
else if (pass == 2)
ContributeMinMax(value);
}
void FinishPass(int pass)
{
if (pass == 0)
FinishCentroid();
else if (pass == 1)
FinishDirection();
}
UnfinishedEndpoints<TVectorSize> GetEndpoints(const float channelWeights[TVectorSize]) const
{
MFloat unweightedBase[TVectorSize];
MFloat unweightedOffset[TVectorSize];
for (int ch = 0; ch < TVectorSize; ch++)
{
MFloat min = m_centroid[ch] + m_direction[ch] * m_minDist;
MFloat max = m_centroid[ch] + m_direction[ch] * m_maxDist;
float safeWeight = channelWeights[ch];
if (safeWeight == 0.f)
safeWeight = 1.0f;
unweightedBase[ch] = min / channelWeights[ch];
unweightedOffset[ch] = (max - min) / channelWeights[ch];
}
return UnfinishedEndpoints<TVectorSize>(unweightedBase, unweightedOffset);
}
private:
void ContributeCentroid(const MFloat *value, const MFloat &weight)
{
for (int ch = 0; ch < TVectorSize; ch++)
m_centroid[ch] = m_centroid[ch] + value[ch] * weight;
m_weightTotal = m_weightTotal + weight;
}
void FinishCentroid()
{
MFloat denom = m_weightTotal;
ParallelMath::MakeSafeDenominator(denom);
for (int ch = 0; ch < TVectorSize; ch++)
m_centroid[ch] = m_centroid[ch] / denom;
}
void ContributeDirection(const MFloat *value, const MFloat &weight)
{
MFloat diff[TVectorSize];
for (int ch = 0; ch < TVectorSize; ch++)
diff[ch] = value[ch] - m_centroid[ch];
m_covarianceMatrix.Add(diff, weight);
}
void FinishDirection()
{
MFloat approx[TVectorSize];
for (int ch = 0; ch < TVectorSize; ch++)
approx[ch] = ParallelMath::MakeFloat(1.0f);
for (int i = 0; i < TIterationCount; i++)
{
MFloat product[TVectorSize];
m_covarianceMatrix.Product(product, approx);
MFloat largestComponent = product[0];
for (int ch = 1; ch < TVectorSize; ch++)
largestComponent = ParallelMath::Max(largestComponent, product[ch]);
// product = largestComponent*newApprox
ParallelMath::MakeSafeDenominator(largestComponent);
for (int ch = 0; ch < TVectorSize; ch++)
approx[ch] = product[ch] / largestComponent;
}
// Normalize
MFloat approxLen = ParallelMath::MakeFloatZero();
for (int ch = 0; ch < TVectorSize; ch++)
approxLen = approxLen + approx[ch] * approx[ch];
approxLen = ParallelMath::Sqrt(approxLen);
ParallelMath::MakeSafeDenominator(approxLen);
for (int ch = 0; ch < TVectorSize; ch++)
m_direction[ch] = approx[ch] / approxLen;
}
void ContributeMinMax(const MFloat *value)
{
MFloat dist = ParallelMath::MakeFloatZero();
for (int ch = 0; ch < TVectorSize; ch++)
dist = dist + m_direction[ch] * (value[ch] - m_centroid[ch]);
m_minDist = ParallelMath::Min(m_minDist, dist);
m_maxDist = ParallelMath::Max(m_maxDist, dist);
}
ParallelMath::Float m_centroid[TVectorSize];
ParallelMath::Float m_direction[TVectorSize];
PackedCovarianceMatrix<TVectorSize> m_covarianceMatrix;
ParallelMath::Float m_weightTotal;
ParallelMath::Float m_minDist;
ParallelMath::Float m_maxDist;
};
}
}
#endif

View file

@ -0,0 +1,282 @@
#pragma once
#include <stdint.h>
// This file is generated by the MakeTables app. Do not edit this file manually.
namespace cvtt { namespace Tables { namespace FakeBT709 {
const uint8_t g_rounding16[] =
{
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 4, 4, 4, 4, 4,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4,
0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 2, 2, 2, 2, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6, 6,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 2, 2, 2, 2, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6, 6,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6, 6,
3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6, 6,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 6, 6, 6,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
1, 1, 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7,
3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 7, 7, 7, 7, 7, 7,
};
}}}

View file

@ -0,0 +1,66 @@
/*
Convection Texture Tools
Copyright (c) 2018-2019 Eric Lasota
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------------
Portions based on DirectX Texture Library (DirectXTex)
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
http://go.microsoft.com/fwlink/?LinkId=248926
*/
#include "ConvectionKernels_Config.h"
#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
#include "ConvectionKernels_IndexSelector.h"
namespace cvtt
{
namespace Internal
{
const ParallelMath::UInt16 g_weightReciprocals[17] =
{
ParallelMath::MakeUInt16(0), // -1
ParallelMath::MakeUInt16(0), // 0
ParallelMath::MakeUInt16(32768), // 1
ParallelMath::MakeUInt16(16384), // 2
ParallelMath::MakeUInt16(10923), // 3
ParallelMath::MakeUInt16(8192), // 4
ParallelMath::MakeUInt16(6554), // 5
ParallelMath::MakeUInt16(5461), // 6
ParallelMath::MakeUInt16(4681), // 7
ParallelMath::MakeUInt16(4096), // 8
ParallelMath::MakeUInt16(3641), // 9
ParallelMath::MakeUInt16(3277), // 10
ParallelMath::MakeUInt16(2979), // 11
ParallelMath::MakeUInt16(2731), // 12
ParallelMath::MakeUInt16(2521), // 13
ParallelMath::MakeUInt16(2341), // 14
ParallelMath::MakeUInt16(2185), // 15
};
}
}
#endif

View file

@ -0,0 +1,147 @@
#pragma once
#ifndef __CVTT_INDEXSELECTOR_H__
#define __CVTT_INDEXSELECTOR_H__
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
namespace Internal
{
extern const ParallelMath::UInt16 g_weightReciprocals[17];
template<int TVectorSize>
class IndexSelector
{
public:
typedef ParallelMath::Float MFloat;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::AInt16 MAInt16;
typedef ParallelMath::SInt32 MSInt32;
typedef ParallelMath::UInt31 MUInt31;
template<class TInterpolationEPType, class TColorEPType>
void Init(const float *channelWeights, const TInterpolationEPType interpolationEndPoints[2][TVectorSize], const TColorEPType colorSpaceEndpoints[2][TVectorSize], int range)
{
// In BC6H, the interpolation endpoints are higher-precision than the endpoints in color space.
// We need to select indexes using the color-space endpoints.
m_isUniform = true;
for (int ch = 1; ch < TVectorSize; ch++)
{
if (channelWeights[ch] != channelWeights[0])
m_isUniform = false;
}
// To work with channel weights, we need something where:
// pxDiff = px - ep[0]
// epDiff = ep[1] - ep[0]
//
// weightedEPDiff = epDiff * channelWeights
// normalizedWeightedAxis = weightedEPDiff / len(weightedEPDiff)
// normalizedIndex = dot(pxDiff * channelWeights, normalizedWeightedAxis) / len(weightedEPDiff)
// index = normalizedIndex * maxValue
//
// Equivalent to:
// axis = channelWeights * maxValue * epDiff * channelWeights / lenSquared(epDiff * channelWeights)
// index = dot(axis, pxDiff)
for (int ep = 0; ep < 2; ep++)
for (int ch = 0; ch < TVectorSize; ch++)
m_endPoint[ep][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(interpolationEndPoints[ep][ch]);
m_range = range;
m_maxValue = static_cast<float>(range - 1);
MFloat epDiffWeighted[TVectorSize];
for (int ch = 0; ch < TVectorSize; ch++)
{
m_origin[ch] = ParallelMath::ToFloat(colorSpaceEndpoints[0][ch]);
MFloat opposingOriginCh = ParallelMath::ToFloat(colorSpaceEndpoints[1][ch]);
epDiffWeighted[ch] = (opposingOriginCh - m_origin[ch]) * channelWeights[ch];
}
MFloat lenSquared = epDiffWeighted[0] * epDiffWeighted[0];
for (int ch = 1; ch < TVectorSize; ch++)
lenSquared = lenSquared + epDiffWeighted[ch] * epDiffWeighted[ch];
ParallelMath::MakeSafeDenominator(lenSquared);
MFloat maxValueDividedByLengthSquared = ParallelMath::MakeFloat(m_maxValue) / lenSquared;
for (int ch = 0; ch < TVectorSize; ch++)
m_axis[ch] = epDiffWeighted[ch] * channelWeights[ch] * maxValueDividedByLengthSquared;
}
template<bool TSigned>
void Init(const float channelWeights[TVectorSize], const MUInt15 endPoints[2][TVectorSize], int range)
{
MAInt16 converted[2][TVectorSize];
for (int epi = 0; epi < 2; epi++)
for (int ch = 0; ch < TVectorSize; ch++)
converted[epi][ch] = ParallelMath::LosslessCast<MAInt16>::Cast(endPoints[epi][ch]);
Init<MUInt15, MUInt15>(channelWeights, endPoints, endPoints, range);
}
void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
{
MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
for (int ch = 0; ch < numRealChannels; ch++)
{
MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(64) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(32), 6));
}
}
void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel, int numRealChannels)
{
MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 64, 7));
for (int ch = 0; ch < numRealChannels; ch++)
{
MUInt15 ep0f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply((ParallelMath::MakeUInt15(256) - weight), ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[0][ch])));
MUInt15 ep1f = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::CompactMultiply(weight, ParallelMath::LosslessCast<MUInt15>::Cast(m_endPoint[1][ch])));
pixel[ch] = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ep0f + ep1f + ParallelMath::MakeUInt15(128), 8));
}
}
void ReconstructLDR_BC7(const MUInt15 &index, MUInt15* pixel)
{
ReconstructLDR_BC7(index, pixel, TVectorSize);
}
void ReconstructLDRPrecise(const MUInt15 &index, MUInt15* pixel)
{
ReconstructLDRPrecise(index, pixel, TVectorSize);
}
MUInt15 SelectIndexLDR(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
{
MFloat dist = (pixel[0] - m_origin[0]) * m_axis[0];
for (int ch = 1; ch < TVectorSize; ch++)
dist = dist + (pixel[ch] - m_origin[ch]) * m_axis[ch];
return ParallelMath::RoundAndConvertToU15(ParallelMath::Clamp(dist, 0.0f, m_maxValue), rtn);
}
protected:
MAInt16 m_endPoint[2][TVectorSize];
private:
MFloat m_origin[TVectorSize];
MFloat m_axis[TVectorSize];
int m_range;
float m_maxValue;
bool m_isUniform;
};
}
}
#endif

View file

@ -0,0 +1,155 @@
#pragma once
#ifndef __CVTT_INDEXSELECTORHDR_H__
#define __CVTT_INDEXSELECTORHDR_H__
#include "ConvectionKernels_ParallelMath.h"
#include "ConvectionKernels_IndexSelector.h"
namespace cvtt
{
namespace Internal
{
ParallelMath::SInt16 UnscaleHDRValueSigned(const ParallelMath::SInt16 &v);
ParallelMath::UInt15 UnscaleHDRValueUnsigned(const ParallelMath::UInt16 &v);
template<int TVectorSize>
class IndexSelectorHDR : public IndexSelector<TVectorSize>
{
public:
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::UInt31 MUInt31;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::SInt32 MSInt32;
typedef ParallelMath::Float MFloat;
private:
MUInt15 InvertSingle(const MUInt15& anIndex) const
{
MUInt15 inverted = m_maxValueMinusOne - anIndex;
return ParallelMath::Select(m_isInverted, inverted, anIndex);
}
void ReconstructHDRSignedUninverted(const MUInt15 &index, MSInt16* pixel) const
{
MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
for (int ch = 0; ch < TVectorSize; ch++)
{
MSInt16 ep0 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[0][ch]);
MSInt16 ep1 = ParallelMath::LosslessCast<MSInt16>::Cast(this->m_endPoint[1][ch]);
MSInt32 pixel32 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
pixel32 = ParallelMath::RightShift(pixel32 + ParallelMath::MakeSInt32(32), 6);
pixel[ch] = UnscaleHDRValueSigned(ParallelMath::ToSInt16(pixel32));
}
}
void ReconstructHDRUnsignedUninverted(const MUInt15 &index, MSInt16* pixel) const
{
MUInt15 weight = ParallelMath::LosslessCast<MUInt15>::Cast(ParallelMath::RightShift(ParallelMath::CompactMultiply(g_weightReciprocals[m_range], index) + 256, 9));
for (int ch = 0; ch < TVectorSize; ch++)
{
MUInt16 ep0 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[0][ch]);
MUInt16 ep1 = ParallelMath::LosslessCast<MUInt16>::Cast(this->m_endPoint[1][ch]);
MUInt31 pixel31 = ParallelMath::XMultiply((ParallelMath::MakeUInt15(64) - weight), ep0) + ParallelMath::XMultiply(weight, ep1);
pixel31 = ParallelMath::RightShift(pixel31 + ParallelMath::MakeUInt31(32), 6);
pixel[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(UnscaleHDRValueUnsigned(ParallelMath::ToUInt16(pixel31)));
}
}
MFloat ErrorForInterpolatorComponent(int index, int ch, const MFloat *pixel) const
{
MFloat diff = pixel[ch] - m_reconstructedInterpolators[index][ch];
return diff * diff;
}
MFloat ErrorForInterpolator(int index, const MFloat *pixel) const
{
MFloat error = ErrorForInterpolatorComponent(index, 0, pixel);
for (int ch = 1; ch < TVectorSize; ch++)
error = error + ErrorForInterpolatorComponent(index, ch, pixel);
return error;
}
public:
void InitHDR(int range, bool isSigned, bool fastIndexing, const float *channelWeights)
{
assert(range <= 16);
m_range = range;
m_isInverted = ParallelMath::MakeBoolInt16(false);
m_maxValueMinusOne = ParallelMath::MakeUInt15(static_cast<uint16_t>(range - 1));
if (!fastIndexing)
{
for (int i = 0; i < range; i++)
{
MSInt16 recon2CL[TVectorSize];
if (isSigned)
ReconstructHDRSignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
else
ReconstructHDRUnsignedUninverted(ParallelMath::MakeUInt15(static_cast<uint16_t>(i)), recon2CL);
for (int ch = 0; ch < TVectorSize; ch++)
m_reconstructedInterpolators[i][ch] = ParallelMath::TwosCLHalfToFloat(recon2CL[ch]) * channelWeights[ch];
}
}
}
void ReconstructHDRSigned(const MUInt15 &index, MSInt16* pixel) const
{
ReconstructHDRSignedUninverted(InvertSingle(index), pixel);
}
void ReconstructHDRUnsigned(const MUInt15 &index, MSInt16* pixel) const
{
ReconstructHDRUnsignedUninverted(InvertSingle(index), pixel);
}
void ConditionalInvert(const ParallelMath::Int16CompFlag &invert)
{
m_isInverted = invert;
}
MUInt15 SelectIndexHDRSlow(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope*) const
{
MUInt15 index = ParallelMath::MakeUInt15(0);
MFloat bestError = ErrorForInterpolator(0, pixel);
for (int i = 1; i < m_range; i++)
{
MFloat error = ErrorForInterpolator(i, pixel);
ParallelMath::FloatCompFlag errorBetter = ParallelMath::Less(error, bestError);
ParallelMath::ConditionalSet(index, ParallelMath::FloatFlagToInt16(errorBetter), ParallelMath::MakeUInt15(static_cast<uint16_t>(i)));
bestError = ParallelMath::Min(bestError, error);
}
return InvertSingle(index);
}
MUInt15 SelectIndexHDRFast(const MFloat* pixel, const ParallelMath::RoundTowardNearestForScope* rtn) const
{
return InvertSingle(this->SelectIndexLDR(pixel, rtn));
}
private:
MFloat m_reconstructedInterpolators[16][TVectorSize];
ParallelMath::Int16CompFlag m_isInverted;
MUInt15 m_maxValueMinusOne;
int m_range;
};
}
}
#endif

View file

@ -0,0 +1,68 @@
#pragma once
#ifndef __CVTT_COVARIANCEMATRIX_H__
#define __CVTT_COVARIANCEMATRIX_H__
namespace cvtt
{
namespace Internal
{
template<int TMatrixSize>
class PackedCovarianceMatrix
{
public:
// 0: xx,
// 1: xy, yy
// 3: xz, yz, zz
// 6: xw, yw, zw, ww
// ... etc.
static const int PyramidSize = (TMatrixSize * (TMatrixSize + 1)) / 2;
typedef ParallelMath::Float MFloat;
PackedCovarianceMatrix()
{
for (int i = 0; i < PyramidSize; i++)
m_values[i] = ParallelMath::MakeFloatZero();
}
void Add(const ParallelMath::Float *vec, const ParallelMath::Float &weight)
{
int index = 0;
for (int row = 0; row < TMatrixSize; row++)
{
for (int col = 0; col <= row; col++)
{
m_values[index] = m_values[index] + vec[row] * vec[col] * weight;
index++;
}
}
}
void Product(MFloat *outVec, const MFloat *inVec)
{
for (int row = 0; row < TMatrixSize; row++)
{
MFloat sum = ParallelMath::MakeFloatZero();
int index = (row * (row + 1)) >> 1;
for (int col = 0; col < TMatrixSize; col++)
{
sum = sum + inVec[col] * m_values[index];
if (col >= row)
index += col + 1;
else
index++;
}
outVec[row] = sum;
}
}
private:
ParallelMath::Float m_values[PyramidSize];
};
}
}
#endif

File diff suppressed because it is too large Load diff

1054
thirdparty/cvtt/ConvectionKernels_S3TC.cpp vendored Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,51 @@
#pragma once
#ifndef __CVTT_S3TC_H__
#define __CVTT_S3TC_H__
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
namespace Internal
{
template<int TVectorSize>
class EndpointRefiner;
}
struct PixelBlockU8;
}
namespace cvtt
{
namespace Internal
{
class S3TCComputer
{
public:
typedef ParallelMath::Float MFloat;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::SInt32 MSInt32;
static void Init(MFloat& error);
static void QuantizeTo6Bits(MUInt15& v);
static void QuantizeTo5Bits(MUInt15& v);
static void QuantizeTo565(MUInt15 endPoint[3]);
static MFloat ParanoidFactorForSpan(const MSInt16& span);
static MFloat ParanoidDiff(const MUInt15& a, const MUInt15& b, const MFloat& d);
static void TestSingleColor(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], int range, const float* channelWeights,
MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, const ParallelMath::RoundTowardNearestForScope *rtn);
static void TestEndpoints(uint32_t flags, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], const MUInt15 unquantizedEndPoints[2][3], int range, const float* channelWeights,
MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange, EndpointRefiner<3> *refiner, const ParallelMath::RoundTowardNearestForScope *rtn);
static void TestCounts(uint32_t flags, const int *counts, int nCounts, const MUInt15 &numElements, const MUInt15 pixels[16][4], const MFloat floatPixels[16][4], const MFloat preWeightedPixels[16][4], bool alphaTest,
const MFloat floatSortedInputs[16][4], const MFloat preWeightedFloatSortedInputs[16][4], const float *channelWeights, MFloat &bestError, MUInt15 bestEndpoints[2][3], MUInt15 bestIndexes[16], MUInt15 &bestRange,
const ParallelMath::RoundTowardNearestForScope* rtn);
static void PackExplicitAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride);
static void PackInterpolatedAlpha(uint32_t flags, const PixelBlockU8* inputs, int inputChannel, uint8_t* packedBlocks, size_t packedBlockStride, bool isSigned, int maxTweakRounds, int numRefineRounds);
static void PackRGB(uint32_t flags, const PixelBlockU8* inputs, uint8_t* packedBlocks, size_t packedBlockStride, const float channelWeights[4], bool alphaTest, float alphaThreshold, bool exhaustive, int maxTweakRounds, int numRefineRounds);
};
}
}
#endif

View file

@ -0,0 +1,304 @@
#pragma once
#include <stdint.h>
// This file is generated by the MakeTables app. Do not edit this file manually.
namespace cvtt { namespace Tables { namespace S3TCSC {
struct TableEntry
{
uint8_t m_min;
uint8_t m_max;
uint8_t m_actualColor;
uint8_t m_span;
};
TableEntry g_singleColor5_3[256] =
{
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 2, 8 }, { 0, 8, 2, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 10, 8 }, { 0, 33, 11, 33 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 18, 8 }, { 8, 41, 19, 33 }, { 24, 16, 21, 8 }, { 24, 16, 21, 8 }, { 33, 0, 22, 33 }, { 24, 24, 24, 0 },
{ 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 41, 29, 17 }, { 33, 24, 30, 9 }, { 33, 24, 30, 9 },
{ 24, 49, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 33, 41, 35, 8 }, { 33, 41, 35, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 },
{ 49, 24, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 43, 8 }, { 33, 66, 44, 33 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 },
{ 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 51, 8 }, { 41, 74, 52, 33 }, { 57, 49, 54, 8 }, { 57, 49, 54, 8 }, { 66, 33, 55, 33 },
{ 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 74, 62, 17 }, { 66, 57, 63, 9 },
{ 66, 57, 63, 9 }, { 57, 82, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 66, 74, 68, 8 }, { 66, 74, 68, 8 }, { 74, 66, 71, 8 }, { 74, 66, 71, 8 },
{ 74, 66, 71, 8 }, { 82, 57, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 76, 8 }, { 66, 99, 77, 33 }, { 82, 74, 79, 8 }, { 82, 74, 79, 8 },
{ 82, 74, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 84, 8 }, { 74, 107, 85, 33 }, { 90, 82, 87, 8 }, { 90, 82, 87, 8 },
{ 99, 66, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 107, 95, 17 },
{ 99, 90, 96, 9 }, { 99, 90, 96, 9 }, { 90, 115, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 99, 107, 101, 8 }, { 99, 107, 101, 8 }, { 107, 99, 104, 8 },
{ 107, 99, 104, 8 }, { 107, 99, 104, 8 }, { 115, 90, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 109, 8 }, { 99, 132, 110, 33 }, { 115, 107, 112, 8 },
{ 115, 107, 112, 8 }, { 115, 107, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 117, 8 }, { 107, 140, 118, 33 }, { 123, 115, 120, 8 },
{ 123, 115, 120, 8 }, { 132, 99, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 },
{ 123, 140, 128, 17 }, { 132, 123, 129, 9 }, { 132, 123, 129, 9 }, { 123, 148, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 132, 140, 134, 8 }, { 132, 140, 134, 8 },
{ 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 148, 123, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 142, 8 }, { 132, 165, 143, 33 },
{ 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 150, 8 }, { 140, 173, 151, 33 },
{ 156, 148, 153, 8 }, { 156, 148, 153, 8 }, { 165, 132, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 159, 9 }, { 156, 165, 159, 9 },
{ 156, 165, 159, 9 }, { 156, 173, 161, 17 }, { 165, 156, 162, 9 }, { 165, 156, 162, 9 }, { 156, 181, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 165, 173, 167, 8 },
{ 165, 173, 167, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 181, 156, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 181, 175, 8 },
{ 165, 198, 176, 33 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 189, 183, 8 },
{ 173, 206, 184, 33 }, { 189, 181, 186, 8 }, { 189, 181, 186, 8 }, { 198, 165, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 198, 192, 9 },
{ 189, 198, 192, 9 }, { 189, 198, 192, 9 }, { 189, 206, 194, 17 }, { 198, 189, 195, 9 }, { 198, 189, 195, 9 }, { 189, 214, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
{ 198, 206, 200, 8 }, { 198, 206, 200, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 214, 189, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
{ 206, 214, 208, 8 }, { 198, 231, 209, 33 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
{ 214, 222, 216, 8 }, { 206, 239, 217, 33 }, { 222, 214, 219, 8 }, { 222, 214, 219, 8 }, { 231, 198, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
{ 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 239, 227, 17 }, { 231, 222, 228, 9 }, { 231, 222, 228, 9 }, { 222, 247, 230, 25 }, { 231, 231, 231, 0 },
{ 231, 231, 231, 0 }, { 231, 239, 233, 8 }, { 231, 239, 233, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 247, 222, 238, 25 }, { 239, 239, 239, 0 },
{ 239, 239, 239, 0 }, { 239, 247, 241, 8 }, { 239, 247, 241, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 247, 247, 0 }, { 247, 255, 249, 8 }, { 247, 255, 249, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
TableEntry g_singleColor6_3[256] =
{
{ 0, 0, 0, 0 }, { 0, 4, 1, 4 }, { 4, 0, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 5, 4 }, { 8, 4, 6, 4 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 12, 9, 4 }, { 12, 8, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 13, 4 }, { 16, 12, 14, 4 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 20, 17, 4 }, { 20, 16, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 21, 4 }, { 24, 20, 22, 4 }, { 0, 69, 23, 69 },
{ 24, 24, 24, 0 }, { 24, 28, 25, 4 }, { 28, 24, 26, 4 }, { 8, 65, 27, 57 }, { 28, 28, 28, 0 }, { 28, 32, 29, 4 }, { 32, 28, 30, 4 }, { 12, 69, 31, 57 },
{ 32, 32, 32, 0 }, { 32, 36, 33, 4 }, { 36, 32, 34, 4 }, { 20, 65, 35, 45 }, { 36, 36, 36, 0 }, { 36, 40, 37, 4 }, { 40, 36, 38, 4 }, { 24, 69, 39, 45 },
{ 40, 40, 40, 0 }, { 40, 44, 41, 4 }, { 44, 40, 42, 4 }, { 32, 65, 43, 33 }, { 44, 44, 44, 0 }, { 44, 48, 45, 4 }, { 48, 44, 46, 4 }, { 36, 69, 47, 33 },
{ 48, 48, 48, 0 }, { 48, 52, 49, 4 }, { 52, 48, 50, 4 }, { 44, 65, 51, 21 }, { 52, 52, 52, 0 }, { 52, 56, 53, 4 }, { 56, 52, 54, 4 }, { 48, 69, 55, 21 },
{ 56, 56, 56, 0 }, { 56, 60, 57, 4 }, { 60, 56, 58, 4 }, { 56, 65, 59, 9 }, { 60, 60, 60, 0 }, { 60, 65, 61, 5 }, { 65, 56, 62, 9 }, { 65, 60, 63, 5 },
{ 60, 73, 64, 13 }, { 65, 65, 65, 0 }, { 65, 69, 66, 4 }, { 69, 65, 67, 4 }, { 73, 60, 68, 13 }, { 69, 69, 69, 0 }, { 69, 73, 70, 4 }, { 73, 69, 71, 4 },
{ 81, 56, 72, 25 }, { 73, 73, 73, 0 }, { 73, 77, 74, 4 }, { 77, 73, 75, 4 }, { 85, 60, 76, 25 }, { 77, 77, 77, 0 }, { 77, 81, 78, 4 }, { 81, 77, 79, 4 },
{ 93, 56, 80, 37 }, { 81, 81, 81, 0 }, { 81, 85, 82, 4 }, { 85, 81, 83, 4 }, { 97, 60, 84, 37 }, { 85, 85, 85, 0 }, { 85, 89, 86, 4 }, { 89, 85, 87, 4 },
{ 105, 56, 88, 49 }, { 89, 89, 89, 0 }, { 89, 93, 90, 4 }, { 93, 89, 91, 4 }, { 109, 60, 92, 49 }, { 93, 93, 93, 0 }, { 93, 97, 94, 4 }, { 97, 93, 95, 4 },
{ 77, 134, 96, 57 }, { 97, 97, 97, 0 }, { 97, 101, 98, 4 }, { 101, 97, 99, 4 }, { 85, 130, 100, 45 }, { 101, 101, 101, 0 }, { 101, 105, 102, 4 }, { 105, 101, 103, 4 },
{ 89, 134, 104, 45 }, { 105, 105, 105, 0 }, { 105, 109, 106, 4 }, { 109, 105, 107, 4 }, { 97, 130, 108, 33 }, { 109, 109, 109, 0 }, { 109, 113, 110, 4 }, { 113, 109, 111, 4 },
{ 101, 134, 112, 33 }, { 113, 113, 113, 0 }, { 113, 117, 114, 4 }, { 117, 113, 115, 4 }, { 109, 130, 116, 21 }, { 117, 117, 117, 0 }, { 117, 121, 118, 4 }, { 121, 117, 119, 4 },
{ 113, 134, 120, 21 }, { 121, 121, 121, 0 }, { 121, 125, 122, 4 }, { 125, 121, 123, 4 }, { 121, 130, 124, 9 }, { 125, 125, 125, 0 }, { 125, 130, 126, 5 }, { 130, 121, 127, 9 },
{ 130, 125, 128, 5 }, { 125, 138, 129, 13 }, { 130, 130, 130, 0 }, { 130, 134, 131, 4 }, { 134, 130, 132, 4 }, { 138, 125, 133, 13 }, { 134, 134, 134, 0 }, { 134, 138, 135, 4 },
{ 138, 134, 136, 4 }, { 146, 121, 137, 25 }, { 138, 138, 138, 0 }, { 138, 142, 139, 4 }, { 142, 138, 140, 4 }, { 150, 125, 141, 25 }, { 142, 142, 142, 0 }, { 142, 146, 143, 4 },
{ 146, 142, 144, 4 }, { 158, 121, 145, 37 }, { 146, 146, 146, 0 }, { 146, 150, 147, 4 }, { 150, 146, 148, 4 }, { 162, 125, 149, 37 }, { 150, 150, 150, 0 }, { 150, 154, 151, 4 },
{ 154, 150, 152, 4 }, { 170, 121, 153, 49 }, { 154, 154, 154, 0 }, { 154, 158, 155, 4 }, { 158, 154, 156, 4 }, { 174, 125, 157, 49 }, { 158, 158, 158, 0 }, { 158, 162, 159, 4 },
{ 162, 158, 160, 4 }, { 142, 199, 161, 57 }, { 162, 162, 162, 0 }, { 162, 166, 163, 4 }, { 166, 162, 164, 4 }, { 150, 195, 165, 45 }, { 166, 166, 166, 0 }, { 166, 170, 167, 4 },
{ 170, 166, 168, 4 }, { 154, 199, 169, 45 }, { 170, 170, 170, 0 }, { 170, 174, 171, 4 }, { 174, 170, 172, 4 }, { 162, 195, 173, 33 }, { 174, 174, 174, 0 }, { 174, 178, 175, 4 },
{ 178, 174, 176, 4 }, { 166, 199, 177, 33 }, { 178, 178, 178, 0 }, { 178, 182, 179, 4 }, { 182, 178, 180, 4 }, { 174, 195, 181, 21 }, { 182, 182, 182, 0 }, { 182, 186, 183, 4 },
{ 186, 182, 184, 4 }, { 178, 199, 185, 21 }, { 186, 186, 186, 0 }, { 186, 190, 187, 4 }, { 190, 186, 188, 4 }, { 186, 195, 189, 9 }, { 190, 190, 190, 0 }, { 190, 195, 191, 5 },
{ 195, 186, 192, 9 }, { 195, 190, 193, 5 }, { 190, 203, 194, 13 }, { 195, 195, 195, 0 }, { 195, 199, 196, 4 }, { 199, 195, 197, 4 }, { 203, 190, 198, 13 }, { 199, 199, 199, 0 },
{ 199, 203, 200, 4 }, { 203, 199, 201, 4 }, { 211, 186, 202, 25 }, { 203, 203, 203, 0 }, { 203, 207, 204, 4 }, { 207, 203, 205, 4 }, { 215, 190, 206, 25 }, { 207, 207, 207, 0 },
{ 207, 211, 208, 4 }, { 211, 207, 209, 4 }, { 223, 186, 210, 37 }, { 211, 211, 211, 0 }, { 211, 215, 212, 4 }, { 215, 211, 213, 4 }, { 227, 190, 214, 37 }, { 215, 215, 215, 0 },
{ 215, 219, 216, 4 }, { 219, 215, 217, 4 }, { 235, 186, 218, 49 }, { 219, 219, 219, 0 }, { 219, 223, 220, 4 }, { 223, 219, 221, 4 }, { 239, 190, 222, 49 }, { 223, 223, 223, 0 },
{ 223, 227, 224, 4 }, { 227, 223, 225, 4 }, { 247, 186, 226, 61 }, { 227, 227, 227, 0 }, { 227, 231, 228, 4 }, { 231, 227, 229, 4 }, { 251, 190, 230, 61 }, { 231, 231, 231, 0 },
{ 231, 235, 232, 4 }, { 235, 231, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 236, 4 }, { 239, 235, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
{ 239, 243, 240, 4 }, { 243, 239, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 244, 4 }, { 247, 243, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 251, 248, 4 }, { 251, 247, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 252, 4 }, { 255, 251, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
TableEntry g_singleColor5_2[256] =
{
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 },
{ 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 },
{ 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 },
{ 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 },
{ 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 },
{ 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 },
{ 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 },
{ 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 },
{ 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 },
{ 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 },
{ 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 },
{ 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 },
{ 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 },
{ 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 },
{ 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 },
{ 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 },
{ 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 },
{ 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 },
{ 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 },
{ 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 },
{ 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 },
{ 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 },
{ 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
{ 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
{ 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
{ 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
{ 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 },
{ 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 },
{ 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
TableEntry g_singleColor6_2[256] =
{
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 },
{ 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 },
{ 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 },
{ 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 },
{ 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 },
{ 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 },
{ 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 },
{ 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 60, 97, 78, 37 }, { 77, 81, 79, 4 },
{ 60, 101, 80, 41 }, { 81, 81, 81, 0 }, { 60, 105, 82, 45 }, { 81, 85, 83, 4 }, { 60, 109, 84, 49 }, { 85, 85, 85, 0 }, { 60, 113, 86, 53 }, { 85, 89, 87, 4 },
{ 60, 117, 88, 57 }, { 89, 89, 89, 0 }, { 60, 121, 90, 61 }, { 89, 93, 91, 4 }, { 60, 125, 92, 65 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 },
{ 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 },
{ 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 },
{ 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 },
{ 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 },
{ 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 },
{ 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 125, 162, 143, 37 },
{ 142, 146, 144, 4 }, { 125, 166, 145, 41 }, { 146, 146, 146, 0 }, { 125, 170, 147, 45 }, { 146, 150, 148, 4 }, { 125, 174, 149, 49 }, { 150, 150, 150, 0 }, { 125, 178, 151, 53 },
{ 150, 154, 152, 4 }, { 125, 182, 153, 57 }, { 154, 154, 154, 0 }, { 125, 186, 155, 61 }, { 154, 158, 156, 4 }, { 125, 190, 157, 65 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 },
{ 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 },
{ 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 },
{ 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 },
{ 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 },
{ 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 },
{ 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 },
{ 190, 227, 208, 37 }, { 207, 211, 209, 4 }, { 190, 231, 210, 41 }, { 211, 211, 211, 0 }, { 190, 235, 212, 45 }, { 211, 215, 213, 4 }, { 190, 239, 214, 49 }, { 215, 215, 215, 0 },
{ 190, 243, 216, 53 }, { 215, 219, 217, 4 }, { 190, 247, 218, 57 }, { 219, 219, 219, 0 }, { 190, 251, 220, 61 }, { 219, 223, 221, 4 }, { 190, 255, 222, 65 }, { 223, 223, 223, 0 },
{ 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
{ 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
{ 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
TableEntry g_singleColor5_3_p[256] =
{
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 2, 8 }, { 0, 8, 2, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 0, 5, 8 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 10, 8 }, { 0, 33, 11, 33 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 8, 13, 8 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 18, 8 }, { 8, 41, 19, 33 }, { 24, 16, 21, 8 }, { 24, 16, 21, 8 }, { 33, 0, 22, 33 }, { 24, 24, 24, 0 },
{ 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 33, 27, 9 }, { 24, 41, 29, 17 }, { 33, 24, 30, 9 }, { 33, 24, 30, 9 },
{ 24, 49, 32, 25 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 33, 41, 35, 8 }, { 33, 41, 35, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 }, { 41, 33, 38, 8 },
{ 49, 24, 40, 25 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 43, 8 }, { 33, 66, 44, 33 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 }, { 49, 41, 46, 8 },
{ 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 51, 8 }, { 41, 74, 52, 33 }, { 57, 49, 54, 8 }, { 57, 49, 54, 8 }, { 66, 33, 55, 33 },
{ 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 66, 60, 9 }, { 57, 74, 62, 17 }, { 66, 57, 63, 9 },
{ 66, 57, 63, 9 }, { 57, 82, 65, 25 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 66, 74, 68, 8 }, { 66, 74, 68, 8 }, { 74, 66, 71, 8 }, { 74, 66, 71, 8 },
{ 74, 66, 71, 8 }, { 82, 57, 73, 25 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 76, 8 }, { 66, 99, 77, 33 }, { 82, 74, 79, 8 }, { 82, 74, 79, 8 },
{ 82, 74, 79, 8 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 84, 8 }, { 74, 107, 85, 33 }, { 90, 82, 87, 8 }, { 90, 82, 87, 8 },
{ 99, 66, 88, 33 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 99, 93, 9 }, { 90, 107, 95, 17 },
{ 99, 90, 96, 9 }, { 99, 90, 96, 9 }, { 90, 115, 98, 25 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 99, 107, 101, 8 }, { 99, 107, 101, 8 }, { 107, 99, 104, 8 },
{ 107, 99, 104, 8 }, { 107, 99, 104, 8 }, { 115, 90, 106, 25 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 109, 8 }, { 99, 132, 110, 33 }, { 115, 107, 112, 8 },
{ 115, 107, 112, 8 }, { 115, 107, 112, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 117, 8 }, { 107, 140, 118, 33 }, { 123, 115, 120, 8 },
{ 123, 115, 120, 8 }, { 132, 99, 121, 33 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 }, { 123, 132, 126, 9 },
{ 123, 140, 128, 17 }, { 132, 123, 129, 9 }, { 132, 123, 129, 9 }, { 123, 148, 131, 25 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 132, 140, 134, 8 }, { 132, 140, 134, 8 },
{ 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 140, 132, 137, 8 }, { 148, 123, 139, 25 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 142, 8 }, { 132, 165, 143, 33 },
{ 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 140, 145, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 150, 8 }, { 140, 173, 151, 33 },
{ 156, 148, 153, 8 }, { 156, 148, 153, 8 }, { 165, 132, 154, 33 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 159, 9 }, { 156, 165, 159, 9 },
{ 156, 165, 159, 9 }, { 156, 173, 161, 17 }, { 165, 156, 162, 9 }, { 165, 156, 162, 9 }, { 156, 181, 164, 25 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 165, 173, 167, 8 },
{ 165, 173, 167, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 173, 165, 170, 8 }, { 181, 156, 172, 25 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 181, 175, 8 },
{ 165, 198, 176, 33 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 173, 178, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 189, 183, 8 },
{ 173, 206, 184, 33 }, { 189, 181, 186, 8 }, { 189, 181, 186, 8 }, { 198, 165, 187, 33 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 198, 192, 9 },
{ 189, 198, 192, 9 }, { 189, 198, 192, 9 }, { 189, 206, 194, 17 }, { 198, 189, 195, 9 }, { 198, 189, 195, 9 }, { 189, 214, 197, 25 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
{ 198, 206, 200, 8 }, { 198, 206, 200, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 206, 198, 203, 8 }, { 214, 189, 205, 25 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
{ 206, 214, 208, 8 }, { 198, 231, 209, 33 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 206, 211, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
{ 214, 222, 216, 8 }, { 206, 239, 217, 33 }, { 222, 214, 219, 8 }, { 222, 214, 219, 8 }, { 231, 198, 220, 33 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
{ 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 231, 225, 9 }, { 222, 239, 227, 17 }, { 231, 222, 228, 9 }, { 231, 222, 228, 9 }, { 222, 247, 230, 25 }, { 231, 231, 231, 0 },
{ 231, 231, 231, 0 }, { 231, 239, 233, 8 }, { 231, 239, 233, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 239, 231, 236, 8 }, { 247, 222, 238, 25 }, { 239, 239, 239, 0 },
{ 239, 239, 239, 0 }, { 239, 247, 241, 8 }, { 239, 247, 241, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 239, 244, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 247, 247, 0 }, { 247, 255, 249, 8 }, { 247, 255, 249, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 247, 252, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
TableEntry g_singleColor6_3_p[256] =
{
{ 0, 0, 0, 0 }, { 0, 4, 1, 4 }, { 4, 0, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 5, 4 }, { 8, 4, 6, 4 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 12, 9, 4 }, { 12, 8, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 13, 4 }, { 16, 12, 14, 4 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 20, 17, 4 }, { 20, 16, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 21, 4 }, { 24, 20, 22, 4 }, { 24, 24, 24, 0 },
{ 24, 24, 24, 0 }, { 24, 28, 25, 4 }, { 28, 24, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 29, 4 }, { 32, 28, 30, 4 }, { 32, 32, 32, 0 },
{ 32, 32, 32, 0 }, { 32, 36, 33, 4 }, { 36, 32, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 37, 4 }, { 40, 36, 38, 4 }, { 40, 40, 40, 0 },
{ 40, 40, 40, 0 }, { 40, 44, 41, 4 }, { 44, 40, 42, 4 }, { 32, 65, 43, 33 }, { 44, 44, 44, 0 }, { 44, 48, 45, 4 }, { 48, 44, 46, 4 }, { 36, 69, 47, 33 },
{ 48, 48, 48, 0 }, { 48, 52, 49, 4 }, { 52, 48, 50, 4 }, { 44, 65, 51, 21 }, { 52, 52, 52, 0 }, { 52, 56, 53, 4 }, { 56, 52, 54, 4 }, { 48, 69, 55, 21 },
{ 56, 56, 56, 0 }, { 56, 60, 57, 4 }, { 60, 56, 58, 4 }, { 56, 65, 59, 9 }, { 60, 60, 60, 0 }, { 60, 65, 61, 5 }, { 65, 56, 62, 9 }, { 65, 60, 63, 5 },
{ 60, 73, 64, 13 }, { 65, 65, 65, 0 }, { 65, 69, 66, 4 }, { 69, 65, 67, 4 }, { 73, 60, 68, 13 }, { 69, 69, 69, 0 }, { 69, 73, 70, 4 }, { 73, 69, 71, 4 },
{ 81, 56, 72, 25 }, { 73, 73, 73, 0 }, { 73, 77, 74, 4 }, { 77, 73, 75, 4 }, { 85, 60, 76, 25 }, { 77, 77, 77, 0 }, { 77, 81, 78, 4 }, { 81, 77, 79, 4 },
{ 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 82, 4 }, { 85, 81, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 86, 4 }, { 89, 85, 87, 4 },
{ 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 90, 4 }, { 93, 89, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 94, 4 }, { 97, 93, 95, 4 },
{ 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 98, 4 }, { 101, 97, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 102, 4 }, { 105, 101, 103, 4 },
{ 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 106, 4 }, { 109, 105, 107, 4 }, { 97, 130, 108, 33 }, { 109, 109, 109, 0 }, { 109, 113, 110, 4 }, { 113, 109, 111, 4 },
{ 101, 134, 112, 33 }, { 113, 113, 113, 0 }, { 113, 117, 114, 4 }, { 117, 113, 115, 4 }, { 109, 130, 116, 21 }, { 117, 117, 117, 0 }, { 117, 121, 118, 4 }, { 121, 117, 119, 4 },
{ 113, 134, 120, 21 }, { 121, 121, 121, 0 }, { 121, 125, 122, 4 }, { 125, 121, 123, 4 }, { 121, 130, 124, 9 }, { 125, 125, 125, 0 }, { 125, 130, 126, 5 }, { 130, 121, 127, 9 },
{ 130, 125, 128, 5 }, { 125, 138, 129, 13 }, { 130, 130, 130, 0 }, { 130, 134, 131, 4 }, { 134, 130, 132, 4 }, { 138, 125, 133, 13 }, { 134, 134, 134, 0 }, { 134, 138, 135, 4 },
{ 138, 134, 136, 4 }, { 146, 121, 137, 25 }, { 138, 138, 138, 0 }, { 138, 142, 139, 4 }, { 142, 138, 140, 4 }, { 150, 125, 141, 25 }, { 142, 142, 142, 0 }, { 142, 146, 143, 4 },
{ 146, 142, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 147, 4 }, { 150, 146, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 154, 151, 4 },
{ 154, 150, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 155, 4 }, { 158, 154, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 162, 159, 4 },
{ 162, 158, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 163, 4 }, { 166, 162, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 170, 167, 4 },
{ 170, 166, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 171, 4 }, { 174, 170, 172, 4 }, { 162, 195, 173, 33 }, { 174, 174, 174, 0 }, { 174, 178, 175, 4 },
{ 178, 174, 176, 4 }, { 166, 199, 177, 33 }, { 178, 178, 178, 0 }, { 178, 182, 179, 4 }, { 182, 178, 180, 4 }, { 174, 195, 181, 21 }, { 182, 182, 182, 0 }, { 182, 186, 183, 4 },
{ 186, 182, 184, 4 }, { 178, 199, 185, 21 }, { 186, 186, 186, 0 }, { 186, 190, 187, 4 }, { 190, 186, 188, 4 }, { 186, 195, 189, 9 }, { 190, 190, 190, 0 }, { 190, 195, 191, 5 },
{ 195, 186, 192, 9 }, { 195, 190, 193, 5 }, { 190, 203, 194, 13 }, { 195, 195, 195, 0 }, { 195, 199, 196, 4 }, { 199, 195, 197, 4 }, { 203, 190, 198, 13 }, { 199, 199, 199, 0 },
{ 199, 203, 200, 4 }, { 203, 199, 201, 4 }, { 211, 186, 202, 25 }, { 203, 203, 203, 0 }, { 203, 207, 204, 4 }, { 207, 203, 205, 4 }, { 215, 190, 206, 25 }, { 207, 207, 207, 0 },
{ 207, 211, 208, 4 }, { 211, 207, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 212, 4 }, { 215, 211, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 },
{ 215, 219, 216, 4 }, { 219, 215, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 220, 4 }, { 223, 219, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 },
{ 223, 227, 224, 4 }, { 227, 223, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 228, 4 }, { 231, 227, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
{ 231, 235, 232, 4 }, { 235, 231, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 236, 4 }, { 239, 235, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
{ 239, 243, 240, 4 }, { 243, 239, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 244, 4 }, { 247, 243, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 251, 248, 4 }, { 251, 247, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 252, 4 }, { 255, 251, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
TableEntry g_singleColor5_2_p[256] =
{
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 0, 8, 4, 8 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 8, 16, 12, 8 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 16, 24, 20, 8 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 },
{ 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 33, 28, 9 }, { 24, 41, 32, 17 },
{ 24, 41, 32, 17 }, { 33, 33, 33, 0 }, { 33, 33, 33, 0 }, { 24, 49, 36, 25 }, { 24, 49, 36, 25 }, { 33, 41, 37, 8 }, { 33, 41, 37, 8 }, { 24, 57, 40, 33 },
{ 24, 57, 40, 33 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 41, 41, 0 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 41, 49, 45, 8 }, { 49, 49, 49, 0 },
{ 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 49, 49, 0 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 49, 57, 53, 8 }, { 57, 57, 57, 0 },
{ 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 57, 57, 0 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 }, { 57, 66, 61, 9 },
{ 57, 74, 65, 17 }, { 57, 74, 65, 17 }, { 66, 66, 66, 0 }, { 66, 66, 66, 0 }, { 57, 82, 69, 25 }, { 57, 82, 69, 25 }, { 66, 74, 70, 8 }, { 66, 74, 70, 8 },
{ 57, 90, 73, 33 }, { 57, 90, 73, 33 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 74, 74, 0 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 }, { 74, 82, 78, 8 },
{ 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 82, 82, 0 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 }, { 82, 90, 86, 8 },
{ 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 90, 90, 0 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 }, { 90, 99, 94, 9 },
{ 90, 99, 94, 9 }, { 90, 107, 98, 17 }, { 90, 107, 98, 17 }, { 99, 99, 99, 0 }, { 99, 99, 99, 0 }, { 90, 115, 102, 25 }, { 90, 115, 102, 25 }, { 99, 107, 103, 8 },
{ 99, 107, 103, 8 }, { 90, 123, 106, 33 }, { 90, 123, 106, 33 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 107, 107, 0 }, { 107, 115, 111, 8 }, { 107, 115, 111, 8 },
{ 107, 115, 111, 8 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 115, 115, 0 }, { 115, 123, 119, 8 }, { 115, 123, 119, 8 },
{ 115, 123, 119, 8 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 123, 123, 0 }, { 123, 132, 127, 9 }, { 123, 132, 127, 9 },
{ 123, 132, 127, 9 }, { 123, 132, 127, 9 }, { 123, 140, 131, 17 }, { 123, 140, 131, 17 }, { 132, 132, 132, 0 }, { 132, 132, 132, 0 }, { 123, 148, 135, 25 }, { 123, 148, 135, 25 },
{ 132, 140, 136, 8 }, { 132, 140, 136, 8 }, { 123, 156, 139, 33 }, { 123, 156, 139, 33 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 140, 140, 0 }, { 140, 148, 144, 8 },
{ 140, 148, 144, 8 }, { 140, 148, 144, 8 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 148, 148, 0 }, { 148, 156, 152, 8 },
{ 148, 156, 152, 8 }, { 148, 156, 152, 8 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 156, 156, 0 }, { 156, 165, 160, 9 },
{ 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 165, 160, 9 }, { 156, 173, 164, 17 }, { 156, 173, 164, 17 }, { 165, 165, 165, 0 }, { 165, 165, 165, 0 }, { 156, 181, 168, 25 },
{ 156, 181, 168, 25 }, { 165, 173, 169, 8 }, { 165, 173, 169, 8 }, { 156, 189, 172, 33 }, { 156, 189, 172, 33 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 }, { 173, 173, 173, 0 },
{ 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 173, 181, 177, 8 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 }, { 181, 181, 181, 0 },
{ 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 181, 189, 185, 8 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 }, { 189, 189, 189, 0 },
{ 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 198, 193, 9 }, { 189, 206, 197, 17 }, { 189, 206, 197, 17 }, { 198, 198, 198, 0 }, { 198, 198, 198, 0 },
{ 189, 214, 201, 25 }, { 189, 214, 201, 25 }, { 198, 206, 202, 8 }, { 198, 206, 202, 8 }, { 189, 222, 205, 33 }, { 189, 222, 205, 33 }, { 206, 206, 206, 0 }, { 206, 206, 206, 0 },
{ 206, 206, 206, 0 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 206, 214, 210, 8 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 }, { 214, 214, 214, 0 },
{ 214, 214, 214, 0 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 214, 222, 218, 8 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 }, { 222, 222, 222, 0 },
{ 222, 222, 222, 0 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 231, 226, 9 }, { 222, 239, 230, 17 }, { 222, 239, 230, 17 }, { 231, 231, 231, 0 },
{ 231, 231, 231, 0 }, { 222, 247, 234, 25 }, { 222, 247, 234, 25 }, { 231, 239, 235, 8 }, { 231, 239, 235, 8 }, { 222, 255, 238, 33 }, { 222, 255, 238, 33 }, { 239, 239, 239, 0 },
{ 239, 239, 239, 0 }, { 239, 239, 239, 0 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 239, 247, 243, 8 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 247, 247, 0 }, { 247, 247, 247, 0 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 247, 255, 251, 8 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
TableEntry g_singleColor6_2_p[256] =
{
{ 0, 0, 0, 0 }, { 0, 0, 0, 0 }, { 0, 4, 2, 4 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 4, 4, 0 }, { 4, 8, 6, 4 }, { 8, 8, 8, 0 },
{ 8, 8, 8, 0 }, { 8, 8, 8, 0 }, { 8, 12, 10, 4 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 12, 12, 0 }, { 12, 16, 14, 4 }, { 16, 16, 16, 0 },
{ 16, 16, 16, 0 }, { 16, 16, 16, 0 }, { 16, 20, 18, 4 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 20, 20, 0 }, { 20, 24, 22, 4 }, { 24, 24, 24, 0 },
{ 24, 24, 24, 0 }, { 24, 24, 24, 0 }, { 24, 28, 26, 4 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 28, 28, 0 }, { 28, 32, 30, 4 }, { 32, 32, 32, 0 },
{ 32, 32, 32, 0 }, { 32, 32, 32, 0 }, { 32, 36, 34, 4 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 36, 36, 0 }, { 36, 40, 38, 4 }, { 40, 40, 40, 0 },
{ 40, 40, 40, 0 }, { 40, 40, 40, 0 }, { 40, 44, 42, 4 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 44, 44, 0 }, { 44, 48, 46, 4 }, { 48, 48, 48, 0 },
{ 48, 48, 48, 0 }, { 48, 48, 48, 0 }, { 48, 52, 50, 4 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 52, 52, 0 }, { 52, 56, 54, 4 }, { 56, 56, 56, 0 },
{ 56, 56, 56, 0 }, { 56, 56, 56, 0 }, { 56, 60, 58, 4 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 60, 60, 0 }, { 60, 65, 62, 5 }, { 60, 65, 62, 5 },
{ 60, 69, 64, 9 }, { 65, 65, 65, 0 }, { 60, 73, 66, 13 }, { 65, 69, 67, 4 }, { 60, 77, 68, 17 }, { 69, 69, 69, 0 }, { 60, 81, 70, 21 }, { 69, 73, 71, 4 },
{ 60, 85, 72, 25 }, { 73, 73, 73, 0 }, { 60, 89, 74, 29 }, { 73, 77, 75, 4 }, { 60, 93, 76, 33 }, { 77, 77, 77, 0 }, { 77, 77, 77, 0 }, { 77, 81, 79, 4 },
{ 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 81, 81, 0 }, { 81, 85, 83, 4 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 85, 85, 0 }, { 85, 89, 87, 4 },
{ 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 89, 89, 0 }, { 89, 93, 91, 4 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 93, 93, 0 }, { 93, 97, 95, 4 },
{ 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 97, 97, 0 }, { 97, 101, 99, 4 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 101, 101, 0 }, { 101, 105, 103, 4 },
{ 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 105, 105, 0 }, { 105, 109, 107, 4 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 109, 109, 0 }, { 109, 113, 111, 4 },
{ 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 113, 113, 0 }, { 113, 117, 115, 4 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 117, 117, 0 }, { 117, 121, 119, 4 },
{ 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 121, 121, 0 }, { 121, 125, 123, 4 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 125, 125, 0 }, { 125, 130, 127, 5 },
{ 125, 130, 127, 5 }, { 125, 134, 129, 9 }, { 130, 130, 130, 0 }, { 125, 138, 131, 13 }, { 130, 134, 132, 4 }, { 125, 142, 133, 17 }, { 134, 134, 134, 0 }, { 125, 146, 135, 21 },
{ 134, 138, 136, 4 }, { 125, 150, 137, 25 }, { 138, 138, 138, 0 }, { 125, 154, 139, 29 }, { 138, 142, 140, 4 }, { 125, 158, 141, 33 }, { 142, 142, 142, 0 }, { 142, 142, 142, 0 },
{ 142, 146, 144, 4 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 146, 146, 0 }, { 146, 150, 148, 4 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 }, { 150, 150, 150, 0 },
{ 150, 154, 152, 4 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 154, 154, 0 }, { 154, 158, 156, 4 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 }, { 158, 158, 158, 0 },
{ 158, 162, 160, 4 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 162, 162, 0 }, { 162, 166, 164, 4 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 }, { 166, 166, 166, 0 },
{ 166, 170, 168, 4 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 170, 170, 0 }, { 170, 174, 172, 4 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 }, { 174, 174, 174, 0 },
{ 174, 178, 176, 4 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 178, 178, 0 }, { 178, 182, 180, 4 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 }, { 182, 182, 182, 0 },
{ 182, 186, 184, 4 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 186, 186, 0 }, { 186, 190, 188, 4 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 }, { 190, 190, 190, 0 },
{ 190, 195, 192, 5 }, { 190, 195, 192, 5 }, { 190, 199, 194, 9 }, { 195, 195, 195, 0 }, { 190, 203, 196, 13 }, { 195, 199, 197, 4 }, { 190, 207, 198, 17 }, { 199, 199, 199, 0 },
{ 190, 211, 200, 21 }, { 199, 203, 201, 4 }, { 190, 215, 202, 25 }, { 203, 203, 203, 0 }, { 190, 219, 204, 29 }, { 203, 207, 205, 4 }, { 190, 223, 206, 33 }, { 207, 207, 207, 0 },
{ 207, 207, 207, 0 }, { 207, 211, 209, 4 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 211, 211, 0 }, { 211, 215, 213, 4 }, { 215, 215, 215, 0 }, { 215, 215, 215, 0 },
{ 215, 215, 215, 0 }, { 215, 219, 217, 4 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 219, 219, 0 }, { 219, 223, 221, 4 }, { 223, 223, 223, 0 }, { 223, 223, 223, 0 },
{ 223, 223, 223, 0 }, { 223, 227, 225, 4 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 227, 227, 0 }, { 227, 231, 229, 4 }, { 231, 231, 231, 0 }, { 231, 231, 231, 0 },
{ 231, 231, 231, 0 }, { 231, 235, 233, 4 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 235, 235, 0 }, { 235, 239, 237, 4 }, { 239, 239, 239, 0 }, { 239, 239, 239, 0 },
{ 239, 239, 239, 0 }, { 239, 243, 241, 4 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 243, 243, 0 }, { 243, 247, 245, 4 }, { 247, 247, 247, 0 }, { 247, 247, 247, 0 },
{ 247, 247, 247, 0 }, { 247, 251, 249, 4 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 251, 251, 0 }, { 251, 255, 253, 4 }, { 255, 255, 255, 0 }, { 255, 255, 255, 0 },
};
}}}

View file

@ -0,0 +1,48 @@
/*
Convection Texture Tools
Copyright (c) 2018-2019 Eric Lasota
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------------
Portions based on DirectX Texture Library (DirectXTex)
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
http://go.microsoft.com/fwlink/?LinkId=248926
*/
#include "ConvectionKernels_Config.h"
#if defined(CVTT_SINGLE_FILE)
#define CVTT_SINGLE_FILE_IMPL
#include "ConvectionKernels_API.cpp"
#include "ConvectionKernels_BC67.cpp"
#include "ConvectionKernels_BC6H_IO.cpp"
#include "ConvectionKernels_BC7_PrioData.cpp"
#include "ConvectionKernels_BCCommon.cpp"
#include "ConvectionKernels_ETC.cpp"
#include "ConvectionKernels_IndexSelector.cpp"
#include "ConvectionKernels_S3TC.cpp"
#include "ConvectionKernels_Util.cpp"
#endif

View file

@ -0,0 +1,121 @@
#pragma once
#include "ConvectionKernels_Util.h"
namespace cvtt
{
namespace Internal
{
template<int TVectorSize>
class UnfinishedEndpoints
{
public:
typedef ParallelMath::Float MFloat;
typedef ParallelMath::UInt16 MUInt16;
typedef ParallelMath::UInt15 MUInt15;
typedef ParallelMath::SInt16 MSInt16;
typedef ParallelMath::SInt32 MSInt32;
UnfinishedEndpoints()
{
}
UnfinishedEndpoints(const MFloat *base, const MFloat *offset)
{
for (int ch = 0; ch < TVectorSize; ch++)
m_base[ch] = base[ch];
for (int ch = 0; ch < TVectorSize; ch++)
m_offset[ch] = offset[ch];
}
UnfinishedEndpoints(const UnfinishedEndpoints& other)
{
for (int ch = 0; ch < TVectorSize; ch++)
m_base[ch] = other.m_base[ch];
for (int ch = 0; ch < TVectorSize; ch++)
m_offset[ch] = other.m_offset[ch];
}
void FinishHDRUnsigned(int tweak, int range, MSInt16 *outEP0, MSInt16 *outEP1, ParallelMath::RoundTowardNearestForScope *roundingMode)
{
float tweakFactors[2];
Util::ComputeTweakFactors(tweak, range, tweakFactors);
for (int ch = 0; ch < TVectorSize; ch++)
{
MUInt15 channelEPs[2];
for (int epi = 0; epi < 2; epi++)
{
MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], 0.0f, 31743.0f);
channelEPs[epi] = ParallelMath::RoundAndConvertToU15(f, roundingMode);
}
outEP0[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[0]);
outEP1[ch] = ParallelMath::LosslessCast<MSInt16>::Cast(channelEPs[1]);
}
}
void FinishHDRSigned(int tweak, int range, MSInt16* outEP0, MSInt16* outEP1, ParallelMath::RoundTowardNearestForScope* roundingMode)
{
float tweakFactors[2];
Util::ComputeTweakFactors(tweak, range, tweakFactors);
for (int ch = 0; ch < TVectorSize; ch++)
{
MSInt16 channelEPs[2];
for (int epi = 0; epi < 2; epi++)
{
MFloat f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[epi], -31743.0f, 31743.0f);
channelEPs[epi] = ParallelMath::RoundAndConvertToS16(f, roundingMode);
}
outEP0[ch] = channelEPs[0];
outEP1[ch] = channelEPs[1];
}
}
void FinishLDR(int tweak, int range, MUInt15* outEP0, MUInt15* outEP1)
{
ParallelMath::RoundTowardNearestForScope roundingMode;
float tweakFactors[2];
Util::ComputeTweakFactors(tweak, range, tweakFactors);
for (int ch = 0; ch < TVectorSize; ch++)
{
MFloat ep0f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[0], 0.0f, 255.0f);
MFloat ep1f = ParallelMath::Clamp(m_base[ch] + m_offset[ch] * tweakFactors[1], 0.0f, 255.0f);
outEP0[ch] = ParallelMath::RoundAndConvertToU15(ep0f, &roundingMode);
outEP1[ch] = ParallelMath::RoundAndConvertToU15(ep1f, &roundingMode);
}
}
template<int TNewVectorSize>
UnfinishedEndpoints<TNewVectorSize> ExpandTo(float filler)
{
MFloat newBase[TNewVectorSize];
MFloat newOffset[TNewVectorSize];
for (int ch = 0; ch < TNewVectorSize && ch < TVectorSize; ch++)
{
newBase[ch] = m_base[ch];
newOffset[ch] = m_offset[ch];
}
MFloat fillerV = ParallelMath::MakeFloat(filler);
for (int ch = TVectorSize; ch < TNewVectorSize; ch++)
{
newBase[ch] = fillerV;
newOffset[ch] = ParallelMath::MakeFloatZero();
}
return UnfinishedEndpoints<TNewVectorSize>(newBase, newOffset);
}
private:
MFloat m_base[TVectorSize];
MFloat m_offset[TVectorSize];
};
}
}

View file

@ -0,0 +1,88 @@
/*
Convection Texture Tools
Copyright (c) 2018-2019 Eric Lasota
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject
to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------------
Portions based on DirectX Texture Library (DirectXTex)
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
http://go.microsoft.com/fwlink/?LinkId=248926
*/
#include "ConvectionKernels_Config.h"
#if !defined(CVTT_SINGLE_FILE) || defined(CVTT_SINGLE_FILE_IMPL)
#include "ConvectionKernels.h"
#include "ConvectionKernels_ParallelMath.h"
#include <algorithm>
namespace cvtt
{
namespace Util
{
// Signed input blocks are converted into unsigned space, with the maximum value being 254
void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize])
{
for (size_t block = 0; block < ParallelMath::ParallelSize; block++)
{
const PixelBlockS8& inputSignedBlock = inputSigned[block];
PixelBlockU8& inputNormalizedBlock = inputNormalized[block];
for (size_t px = 0; px < 16; px++)
{
for (size_t ch = 0; ch < 4; ch++)
inputNormalizedBlock.m_pixels[px][ch] = static_cast<uint8_t>(std::max<int>(inputSignedBlock.m_pixels[px][ch], -127) + 127);
}
}
}
void FillWeights(const Options &options, float channelWeights[4])
{
if (options.flags & Flags::Uniform)
channelWeights[0] = channelWeights[1] = channelWeights[2] = channelWeights[3] = 1.0f;
else
{
channelWeights[0] = options.redWeight;
channelWeights[1] = options.greenWeight;
channelWeights[2] = options.blueWeight;
channelWeights[3] = options.alphaWeight;
}
}
void ComputeTweakFactors(int tweak, int range, float *outFactors)
{
int totalUnits = range - 1;
int minOutsideUnits = ((tweak >> 1) & 1);
int maxOutsideUnits = (tweak & 1);
int insideUnits = totalUnits - minOutsideUnits - maxOutsideUnits;
outFactors[0] = -static_cast<float>(minOutsideUnits) / static_cast<float>(insideUnits);
outFactors[1] = static_cast<float>(maxOutsideUnits) / static_cast<float>(insideUnits) + 1.0f;
}
}
}
#endif

View file

@ -0,0 +1,21 @@
#pragma once
#include "ConvectionKernels_ParallelMath.h"
namespace cvtt
{
struct PixelBlockU8;
struct PixelBlockS8;
struct Options;
}
namespace cvtt
{
namespace Util
{
// Signed input blocks are converted into unsigned space, with the maximum value being 254
void BiasSignedInput(PixelBlockU8 inputNormalized[ParallelMath::ParallelSize], const PixelBlockS8 inputSigned[ParallelMath::ParallelSize]);
void FillWeights(const Options &options, float channelWeights[4]);
void ComputeTweakFactors(int tweak, int range, float *outFactors);
}
}

27
thirdparty/cvtt/etc_notes.txt vendored Normal file
View file

@ -0,0 +1,27 @@
The ETC1 compressor uses modified cluster fit:
Assume that there exists an ideal base color and set of selectors for a given table.
For a given table and set of selectors, the ideal base color can be determined by subtracting the offsets from each pixel and averaging them.
Doing that is equivalent to subtracting the average offset from the average color.
Because positive and negative selectors of the same magnitude cancel out, the search space of possible average offsets is reduced: 57 unique offsets for the first table and 81 for the others.
Most of the offsets result in the same color as another average offset due to quantization of the base color, so those can be de-duplicated.
So:
- Start with a high-precision average color.
- Apply precomputed luma offsets to it.
- Quantize and de-duplicate the base colors.
- Find the ideal selectors for each base color.
Differential mode is solved by just finding the best legal combination from those attempts.
There are several scenarios where this is not ideal:
- Clamping behavior can sometimes be leveraged for a more accurate block.
- Differentials can sometimes be moved slightly closer to become legal.
- This only works when MSE is the error metric (i.e. not normal maps)
- This only works when pixel weights are of equal importance (i.e. not using weight by alpha or edge deblocking)
T and H mode just work by generating clustering assignments by computing a chrominance line and splitting the block in half by the chrominance midpoint and using those to determine the averages.
Planar mode is just solved algebraically.
If you want to emulate etc2comp's default settings, add the flag ETC_UseFakeBT709 to use its modified Rec. 709 error coefficients.
Doing that will significantly slow down encoding because it requires much more complicated quantization math.