virtualx-engine/thirdparty/basis_universal/encoder/basisu_resampler.cpp

852 lines
19 KiB
C++

// basisu_resampler.cpp
// Copyright (C) 2019 Binomial LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "basisu_resampler.h"
#include "basisu_resampler_filters.h"
#ifndef max
#define max(a, b) (((a) > (b)) ? (a) : (b))
#endif
#ifndef min
#define min(a, b) (((a) < (b)) ? (a) : (b))
#endif
#define RESAMPLER_DEBUG 0
namespace basisu
{
static inline int resampler_range_check(int v, int h)
{
BASISU_NOTE_UNUSED(h);
assert((v >= 0) && (v < h));
return v;
}
// Float to int cast with truncation.
static inline int cast_to_int(Resample_Real i)
{
return (int)i;
}
// Ensure that the contributing source sample is within bounds. If not, reflect, clamp, or wrap.
int Resampler::reflect(const int j, const int src_x, const Boundary_Op boundary_op)
{
int n;
if (j < 0)
{
if (boundary_op == BOUNDARY_REFLECT)
{
n = -j;
if (n >= src_x)
n = src_x - 1;
}
else if (boundary_op == BOUNDARY_WRAP)
n = posmod(j, src_x);
else
n = 0;
}
else if (j >= src_x)
{
if (boundary_op == BOUNDARY_REFLECT)
{
n = (src_x - j) + (src_x - 1);
if (n < 0)
n = 0;
}
else if (boundary_op == BOUNDARY_WRAP)
n = posmod(j, src_x);
else
n = src_x - 1;
}
else
n = j;
return n;
}
// The make_clist() method generates, for all destination samples,
// the list of all source samples with non-zero weighted contributions.
Resampler::Contrib_List * Resampler::make_clist(
int src_x, int dst_x, Boundary_Op boundary_op,
Resample_Real(*Pfilter)(Resample_Real),
Resample_Real filter_support,
Resample_Real filter_scale,
Resample_Real src_ofs)
{
struct Contrib_Bounds
{
// The center of the range in DISCRETE coordinates (pixel center = 0.0f).
Resample_Real center;
int left, right;
};
int i, j, k, n, left, right;
Resample_Real total_weight;
Resample_Real xscale, center, half_width, weight;
Contrib_List* Pcontrib;
Contrib* Pcpool;
Contrib* Pcpool_next;
Contrib_Bounds* Pcontrib_bounds;
if ((Pcontrib = (Contrib_List*)calloc(dst_x, sizeof(Contrib_List))) == NULL)
return NULL;
Pcontrib_bounds = (Contrib_Bounds*)calloc(dst_x, sizeof(Contrib_Bounds));
if (!Pcontrib_bounds)
{
free(Pcontrib);
return (NULL);
}
const Resample_Real oo_filter_scale = 1.0f / filter_scale;
const Resample_Real NUDGE = 0.5f;
xscale = dst_x / (Resample_Real)src_x;
if (xscale < 1.0f)
{
int total;
(void)total;
// Handle case when there are fewer destination samples than source samples (downsampling/minification).
// stretched half width of filter
half_width = (filter_support / xscale) * filter_scale;
// Find the range of source sample(s) that will contribute to each destination sample.
for (i = 0, n = 0; i < dst_x; i++)
{
// Convert from discrete to continuous coordinates, scale, then convert back to discrete.
center = ((Resample_Real)i + NUDGE) / xscale;
center -= NUDGE;
center += src_ofs;
left = cast_to_int((Resample_Real)floor(center - half_width));
right = cast_to_int((Resample_Real)ceil(center + half_width));
Pcontrib_bounds[i].center = center;
Pcontrib_bounds[i].left = left;
Pcontrib_bounds[i].right = right;
n += (right - left + 1);
}
// Allocate memory for contributors.
if ((n == 0) || ((Pcpool = (Contrib*)calloc(n, sizeof(Contrib))) == NULL))
{
free(Pcontrib);
free(Pcontrib_bounds);
return NULL;
}
total = n;
Pcpool_next = Pcpool;
// Create the list of source samples which contribute to each destination sample.
for (i = 0; i < dst_x; i++)
{
int max_k = -1;
Resample_Real max_w = -1e+20f;
center = Pcontrib_bounds[i].center;
left = Pcontrib_bounds[i].left;
right = Pcontrib_bounds[i].right;
Pcontrib[i].n = 0;
Pcontrib[i].p = Pcpool_next;
Pcpool_next += (right - left + 1);
assert((Pcpool_next - Pcpool) <= total);
total_weight = 0;
for (j = left; j <= right; j++)
total_weight += (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale);
const Resample_Real norm = static_cast<Resample_Real>(1.0f / total_weight);
total_weight = 0;
#if RESAMPLER_DEBUG
printf("%i: ", i);
#endif
for (j = left; j <= right; j++)
{
weight = (*Pfilter)((center - (Resample_Real)j) * xscale * oo_filter_scale) * norm;
if (weight == 0.0f)
continue;
n = reflect(j, src_x, boundary_op);
#if RESAMPLER_DEBUG
printf("%i(%f), ", n, weight);
#endif
// Increment the number of source samples which contribute to the current destination sample.
k = Pcontrib[i].n++;
Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */
Pcontrib[i].p[k].weight = weight; /* store src sample weight */
total_weight += weight; /* total weight of all contributors */
if (weight > max_w)
{
max_w = weight;
max_k = k;
}
}
#if RESAMPLER_DEBUG
printf("\n\n");
#endif
//assert(Pcontrib[i].n);
//assert(max_k != -1);
if ((max_k == -1) || (Pcontrib[i].n == 0))
{
free(Pcpool);
free(Pcontrib);
free(Pcontrib_bounds);
return NULL;
}
if (total_weight != 1.0f)
Pcontrib[i].p[max_k].weight += 1.0f - total_weight;
}
}
else
{
// Handle case when there are more destination samples than source samples (upsampling).
half_width = filter_support * filter_scale;
// Find the source sample(s) that contribute to each destination sample.
for (i = 0, n = 0; i < dst_x; i++)
{
// Convert from discrete to continuous coordinates, scale, then convert back to discrete.
center = ((Resample_Real)i + NUDGE) / xscale;
center -= NUDGE;
center += src_ofs;
left = cast_to_int((Resample_Real)floor(center - half_width));
right = cast_to_int((Resample_Real)ceil(center + half_width));
Pcontrib_bounds[i].center = center;
Pcontrib_bounds[i].left = left;
Pcontrib_bounds[i].right = right;
n += (right - left + 1);
}
/* Allocate memory for contributors. */
int total = n;
if ((total == 0) || ((Pcpool = (Contrib*)calloc(total, sizeof(Contrib))) == NULL))
{
free(Pcontrib);
free(Pcontrib_bounds);
return NULL;
}
Pcpool_next = Pcpool;
// Create the list of source samples which contribute to each destination sample.
for (i = 0; i < dst_x; i++)
{
int max_k = -1;
Resample_Real max_w = -1e+20f;
center = Pcontrib_bounds[i].center;
left = Pcontrib_bounds[i].left;
right = Pcontrib_bounds[i].right;
Pcontrib[i].n = 0;
Pcontrib[i].p = Pcpool_next;
Pcpool_next += (right - left + 1);
assert((Pcpool_next - Pcpool) <= total);
total_weight = 0;
for (j = left; j <= right; j++)
total_weight += (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale);
const Resample_Real norm = static_cast<Resample_Real>(1.0f / total_weight);
total_weight = 0;
#if RESAMPLER_DEBUG
printf("%i: ", i);
#endif
for (j = left; j <= right; j++)
{
weight = (*Pfilter)((center - (Resample_Real)j) * oo_filter_scale) * norm;
if (weight == 0.0f)
continue;
n = reflect(j, src_x, boundary_op);
#if RESAMPLER_DEBUG
printf("%i(%f), ", n, weight);
#endif
// Increment the number of source samples which contribute to the current destination sample.
k = Pcontrib[i].n++;
Pcontrib[i].p[k].pixel = (unsigned short)n; /* store src sample number */
Pcontrib[i].p[k].weight = weight; /* store src sample weight */
total_weight += weight; /* total weight of all contributors */
if (weight > max_w)
{
max_w = weight;
max_k = k;
}
}
#if RESAMPLER_DEBUG
printf("\n\n");
#endif
//assert(Pcontrib[i].n);
//assert(max_k != -1);
if ((max_k == -1) || (Pcontrib[i].n == 0))
{
free(Pcpool);
free(Pcontrib);
free(Pcontrib_bounds);
return NULL;
}
if (total_weight != 1.0f)
Pcontrib[i].p[max_k].weight += 1.0f - total_weight;
}
}
#if RESAMPLER_DEBUG
printf("*******\n");
#endif
free(Pcontrib_bounds);
return Pcontrib;
}
void Resampler::resample_x(Sample * Pdst, const Sample * Psrc)
{
assert(Pdst);
assert(Psrc);
int i, j;
Sample total;
Contrib_List* Pclist = m_Pclist_x;
Contrib* p;
for (i = m_resample_dst_x; i > 0; i--, Pclist++)
{
#if BASISU_RESAMPLER_DEBUG_OPS
total_ops += Pclist->n;
#endif
for (j = Pclist->n, p = Pclist->p, total = 0; j > 0; j--, p++)
total += Psrc[p->pixel] * p->weight;
*Pdst++ = total;
}
}
void Resampler::scale_y_mov(Sample * Ptmp, const Sample * Psrc, Resample_Real weight, int dst_x)
{
int i;
#if BASISU_RESAMPLER_DEBUG_OPS
total_ops += dst_x;
#endif
// Not += because temp buf wasn't cleared.
for (i = dst_x; i > 0; i--)
* Ptmp++ = *Psrc++ * weight;
}
void Resampler::scale_y_add(Sample * Ptmp, const Sample * Psrc, Resample_Real weight, int dst_x)
{
#if BASISU_RESAMPLER_DEBUG_OPS
total_ops += dst_x;
#endif
for (int i = dst_x; i > 0; i--)
(*Ptmp++) += *Psrc++ * weight;
}
void Resampler::clamp(Sample * Pdst, int n)
{
while (n > 0)
{
Sample x = *Pdst;
*Pdst++ = clamp_sample(x);
n--;
}
}
void Resampler::resample_y(Sample * Pdst)
{
int i, j;
Sample* Psrc;
Contrib_List* Pclist = &m_Pclist_y[m_cur_dst_y];
Sample* Ptmp = m_delay_x_resample ? m_Ptmp_buf : Pdst;
assert(Ptmp);
/* Process each contributor. */
for (i = 0; i < Pclist->n; i++)
{
// locate the contributor's location in the scan buffer -- the contributor must always be found!
for (j = 0; j < MAX_SCAN_BUF_SIZE; j++)
if (m_Pscan_buf->scan_buf_y[j] == Pclist->p[i].pixel)
break;
assert(j < MAX_SCAN_BUF_SIZE);
Psrc = m_Pscan_buf->scan_buf_l[j];
if (!i)
scale_y_mov(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x);
else
scale_y_add(Ptmp, Psrc, Pclist->p[i].weight, m_intermediate_x);
/* If this source line doesn't contribute to any
* more destination lines then mark the scanline buffer slot
* which holds this source line as free.
* (The max. number of slots used depends on the Y
* axis sampling factor and the scaled filter width.)
*/
if (--m_Psrc_y_count[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] == 0)
{
m_Psrc_y_flag[resampler_range_check(Pclist->p[i].pixel, m_resample_src_y)] = false;
m_Pscan_buf->scan_buf_y[j] = -1;
}
}
/* Now generate the destination line */
if (m_delay_x_resample) // Was X resampling delayed until after Y resampling?
{
assert(Pdst != Ptmp);
resample_x(Pdst, Ptmp);
}
else
{
assert(Pdst == Ptmp);
}
if (m_lo < m_hi)
clamp(Pdst, m_resample_dst_x);
}
bool Resampler::put_line(const Sample * Psrc)
{
int i;
if (m_cur_src_y >= m_resample_src_y)
return false;
/* Does this source line contribute
* to any destination line? if not,
* exit now.
*/
if (!m_Psrc_y_count[resampler_range_check(m_cur_src_y, m_resample_src_y)])
{
m_cur_src_y++;
return true;
}
/* Find an empty slot in the scanline buffer. (FIXME: Perf. is terrible here with extreme scaling ratios.) */
for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
if (m_Pscan_buf->scan_buf_y[i] == -1)
break;
/* If the buffer is full, exit with an error. */
if (i == MAX_SCAN_BUF_SIZE)
{
m_status = STATUS_SCAN_BUFFER_FULL;
return false;
}
m_Psrc_y_flag[resampler_range_check(m_cur_src_y, m_resample_src_y)] = true;
m_Pscan_buf->scan_buf_y[i] = m_cur_src_y;
/* Does this slot have any memory allocated to it? */
if (!m_Pscan_buf->scan_buf_l[i])
{
if ((m_Pscan_buf->scan_buf_l[i] = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL)
{
m_status = STATUS_OUT_OF_MEMORY;
return false;
}
}
// Resampling on the X axis first?
if (m_delay_x_resample)
{
assert(m_intermediate_x == m_resample_src_x);
// Y-X resampling order
memcpy(m_Pscan_buf->scan_buf_l[i], Psrc, m_intermediate_x * sizeof(Sample));
}
else
{
assert(m_intermediate_x == m_resample_dst_x);
// X-Y resampling order
resample_x(m_Pscan_buf->scan_buf_l[i], Psrc);
}
m_cur_src_y++;
return true;
}
const Resampler::Sample* Resampler::get_line()
{
int i;
/* If all the destination lines have been
* generated, then always return NULL.
*/
if (m_cur_dst_y == m_resample_dst_y)
return NULL;
/* Check to see if all the required
* contributors are present, if not,
* return NULL.
*/
for (i = 0; i < m_Pclist_y[m_cur_dst_y].n; i++)
if (!m_Psrc_y_flag[resampler_range_check(m_Pclist_y[m_cur_dst_y].p[i].pixel, m_resample_src_y)])
return NULL;
resample_y(m_Pdst_buf);
m_cur_dst_y++;
return m_Pdst_buf;
}
Resampler::~Resampler()
{
int i;
#if BASISU_RESAMPLER_DEBUG_OPS
printf("actual ops: %i\n", total_ops);
#endif
free(m_Pdst_buf);
m_Pdst_buf = NULL;
if (m_Ptmp_buf)
{
free(m_Ptmp_buf);
m_Ptmp_buf = NULL;
}
/* Don't deallocate a contibutor list
* if the user passed us one of their own.
*/
if ((m_Pclist_x) && (!m_clist_x_forced))
{
free(m_Pclist_x->p);
free(m_Pclist_x);
m_Pclist_x = NULL;
}
if ((m_Pclist_y) && (!m_clist_y_forced))
{
free(m_Pclist_y->p);
free(m_Pclist_y);
m_Pclist_y = NULL;
}
free(m_Psrc_y_count);
m_Psrc_y_count = NULL;
free(m_Psrc_y_flag);
m_Psrc_y_flag = NULL;
if (m_Pscan_buf)
{
for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
free(m_Pscan_buf->scan_buf_l[i]);
free(m_Pscan_buf);
m_Pscan_buf = NULL;
}
}
void Resampler::restart()
{
if (STATUS_OKAY != m_status)
return;
m_cur_src_y = m_cur_dst_y = 0;
int i, j;
for (i = 0; i < m_resample_src_y; i++)
{
m_Psrc_y_count[i] = 0;
m_Psrc_y_flag[i] = false;
}
for (i = 0; i < m_resample_dst_y; i++)
{
for (j = 0; j < m_Pclist_y[i].n; j++)
m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++;
}
for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
{
m_Pscan_buf->scan_buf_y[i] = -1;
free(m_Pscan_buf->scan_buf_l[i]);
m_Pscan_buf->scan_buf_l[i] = NULL;
}
}
Resampler::Resampler(int src_x, int src_y,
int dst_x, int dst_y,
Boundary_Op boundary_op,
Resample_Real sample_low, Resample_Real sample_high,
const char* Pfilter_name,
Contrib_List * Pclist_x,
Contrib_List * Pclist_y,
Resample_Real filter_x_scale,
Resample_Real filter_y_scale,
Resample_Real src_x_ofs,
Resample_Real src_y_ofs)
{
int i, j;
Resample_Real support, (*func)(Resample_Real);
assert(src_x > 0);
assert(src_y > 0);
assert(dst_x > 0);
assert(dst_y > 0);
#if BASISU_RESAMPLER_DEBUG_OPS
total_ops = 0;
#endif
m_lo = sample_low;
m_hi = sample_high;
m_delay_x_resample = false;
m_intermediate_x = 0;
m_Pdst_buf = NULL;
m_Ptmp_buf = NULL;
m_clist_x_forced = false;
m_Pclist_x = NULL;
m_clist_y_forced = false;
m_Pclist_y = NULL;
m_Psrc_y_count = NULL;
m_Psrc_y_flag = NULL;
m_Pscan_buf = NULL;
m_status = STATUS_OKAY;
m_resample_src_x = src_x;
m_resample_src_y = src_y;
m_resample_dst_x = dst_x;
m_resample_dst_y = dst_y;
m_boundary_op = boundary_op;
if ((m_Pdst_buf = (Sample*)malloc(m_resample_dst_x * sizeof(Sample))) == NULL)
{
m_status = STATUS_OUT_OF_MEMORY;
return;
}
// Find the specified filter.
if (Pfilter_name == NULL)
Pfilter_name = BASISU_RESAMPLER_DEFAULT_FILTER;
for (i = 0; i < g_num_resample_filters; i++)
if (strcmp(Pfilter_name, g_resample_filters[i].name) == 0)
break;
if (i == g_num_resample_filters)
{
m_status = STATUS_BAD_FILTER_NAME;
return;
}
func = g_resample_filters[i].func;
support = g_resample_filters[i].support;
/* Create contributor lists, unless the user supplied custom lists. */
if (!Pclist_x)
{
m_Pclist_x = make_clist(m_resample_src_x, m_resample_dst_x, m_boundary_op, func, support, filter_x_scale, src_x_ofs);
if (!m_Pclist_x)
{
m_status = STATUS_OUT_OF_MEMORY;
return;
}
}
else
{
m_Pclist_x = Pclist_x;
m_clist_x_forced = true;
}
if (!Pclist_y)
{
m_Pclist_y = make_clist(m_resample_src_y, m_resample_dst_y, m_boundary_op, func, support, filter_y_scale, src_y_ofs);
if (!m_Pclist_y)
{
m_status = STATUS_OUT_OF_MEMORY;
return;
}
}
else
{
m_Pclist_y = Pclist_y;
m_clist_y_forced = true;
}
if ((m_Psrc_y_count = (int*)calloc(m_resample_src_y, sizeof(int))) == NULL)
{
m_status = STATUS_OUT_OF_MEMORY;
return;
}
if ((m_Psrc_y_flag = (unsigned char*)calloc(m_resample_src_y, sizeof(unsigned char))) == NULL)
{
m_status = STATUS_OUT_OF_MEMORY;
return;
}
// Count how many times each source line contributes to a destination line.
for (i = 0; i < m_resample_dst_y; i++)
for (j = 0; j < m_Pclist_y[i].n; j++)
m_Psrc_y_count[resampler_range_check(m_Pclist_y[i].p[j].pixel, m_resample_src_y)]++;
if ((m_Pscan_buf = (Scan_Buf*)malloc(sizeof(Scan_Buf))) == NULL)
{
m_status = STATUS_OUT_OF_MEMORY;
return;
}
for (i = 0; i < MAX_SCAN_BUF_SIZE; i++)
{
m_Pscan_buf->scan_buf_y[i] = -1;
m_Pscan_buf->scan_buf_l[i] = NULL;
}
m_cur_src_y = m_cur_dst_y = 0;
{
// Determine which axis to resample first by comparing the number of multiplies required
// for each possibility.
int x_ops = count_ops(m_Pclist_x, m_resample_dst_x);
int y_ops = count_ops(m_Pclist_y, m_resample_dst_y);
// Hack 10/2000: Weight Y axis ops a little more than X axis ops.
// (Y axis ops use more cache resources.)
int xy_ops = x_ops * m_resample_src_y +
(4 * y_ops * m_resample_dst_x) / 3;
int yx_ops = (4 * y_ops * m_resample_src_x) / 3 +
x_ops * m_resample_dst_y;
#if BASISU_RESAMPLER_DEBUG_OPS
printf("src: %i %i\n", m_resample_src_x, m_resample_src_y);
printf("dst: %i %i\n", m_resample_dst_x, m_resample_dst_y);
printf("x_ops: %i\n", x_ops);
printf("y_ops: %i\n", y_ops);
printf("xy_ops: %i\n", xy_ops);
printf("yx_ops: %i\n", yx_ops);
#endif
// Now check which resample order is better. In case of a tie, choose the order
// which buffers the least amount of data.
if ((xy_ops > yx_ops) ||
((xy_ops == yx_ops) && (m_resample_src_x < m_resample_dst_x)))
{
m_delay_x_resample = true;
m_intermediate_x = m_resample_src_x;
}
else
{
m_delay_x_resample = false;
m_intermediate_x = m_resample_dst_x;
}
#if BASISU_RESAMPLER_DEBUG_OPS
printf("delaying: %i\n", m_delay_x_resample);
#endif
}
if (m_delay_x_resample)
{
if ((m_Ptmp_buf = (Sample*)malloc(m_intermediate_x * sizeof(Sample))) == NULL)
{
m_status = STATUS_OUT_OF_MEMORY;
return;
}
}
}
void Resampler::get_clists(Contrib_List * *ptr_clist_x, Contrib_List * *ptr_clist_y)
{
if (ptr_clist_x)
* ptr_clist_x = m_Pclist_x;
if (ptr_clist_y)
* ptr_clist_y = m_Pclist_y;
}
int Resampler::get_filter_num()
{
return g_num_resample_filters;
}
const char* Resampler::get_filter_name(int filter_num)
{
if ((filter_num < 0) || (filter_num >= g_num_resample_filters))
return NULL;
else
return g_resample_filters[filter_num].name;
}
} // namespace basisu