Merge pull request #57102 from akien-mga/libwebp-1.2.2

This commit is contained in:
Rémi Verschelde 2022-02-03 15:16:57 +01:00 committed by GitHub
commit 6acbd5f774
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 225 additions and 139 deletions

View file

@ -309,7 +309,7 @@ Files extracted from upstream source:
## libwebp
- Upstream: https://chromium.googlesource.com/webm/libwebp/
- Version: 1.2.1 (9ce5843dbabcfd3f7c39ec7ceba9cbeb213cbfdf, 2021)
- Version: 1.2.2 (b0a860891dcd4c0c2d7c6149e5cccb6eb881cc21, 2022)
- License: BSD-3-Clause
Files extracted from upstream source:
@ -317,10 +317,6 @@ Files extracted from upstream source:
- `src/*` except from: `.am`, `.rc` and `.in` files
- `AUTHORS`, `COPYING`, `PATENTS`
Important: The files `utils/bit_reader_utils.{c,h}` have Godot-made
changes to ensure they build for Javascript/HTML5. Those
changes are marked with `// -- GODOT --` comments.
## mbedtls

View file

@ -32,6 +32,7 @@ Contributors:
- Pascal Massimino (pascal dot massimino at gmail dot com)
- Paweł Hajdan, Jr (phajdan dot jr at chromium dot org)
- Pierre Joye (pierre dot php at gmail dot com)
- Roberto Alanis (alanisbaez at google dot com)
- Sam Clegg (sbc at chromium dot org)
- Scott Hancher (seh at google dot com)
- Scott LaVarnway (slavarnway at google dot com)

View file

@ -403,7 +403,7 @@ static const uint8_t kZigzag[16] = {
0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
};
// See section 13-2: http://tools.ietf.org/html/rfc6386#section-13.2
// See section 13-2: https://datatracker.ietf.org/doc/html/rfc6386#section-13.2
static int GetLargeValue(VP8BitReader* const br, const uint8_t* const p) {
int v;
if (!VP8GetBit(br, p[3], "coeffs")) {

View file

@ -32,7 +32,7 @@ extern "C" {
// version numbers
#define DEC_MAJ_VERSION 1
#define DEC_MIN_VERSION 2
#define DEC_REV_VERSION 1
#define DEC_REV_VERSION 2
// YUV-cache parameters. Cache is 32-bytes wide (= one cacheline).
// Constraints are: We need to store one 16x16 block of luma samples (y),

View file

@ -84,7 +84,7 @@ static const uint8_t kCodeToPlane[CODE_TO_PLANE_CODES] = {
// to 256 (green component values) + 24 (length prefix values)
// + color_cache_size (between 0 and 2048).
// All values computed for 8-bit first level lookup with Mark Adler's tool:
// http://www.hdfgroup.org/ftp/lib-external/zlib/zlib-1.2.5/examples/enough.c
// https://github.com/madler/zlib/blob/v1.2.5/examples/enough.c
#define FIXED_TABLE_SIZE (630 * 3 + 410)
static const uint16_t kTableSize[12] = {
FIXED_TABLE_SIZE + 654,

View file

@ -23,6 +23,14 @@
#define NUM_CHANNELS 4
// Channel extraction from a uint32_t representation of a uint8_t RGBA/BGRA
// buffer.
#ifdef WORDS_BIGENDIAN
#define CHANNEL_SHIFT(i) (24 - (i) * 8)
#else
#define CHANNEL_SHIFT(i) ((i) * 8)
#endif
typedef void (*BlendRowFunc)(uint32_t* const, const uint32_t* const, int);
static void BlendPixelRowNonPremult(uint32_t* const src,
const uint32_t* const dst, int num_pixels);
@ -209,35 +217,35 @@ static uint8_t BlendChannelNonPremult(uint32_t src, uint8_t src_a,
const uint8_t dst_channel = (dst >> shift) & 0xff;
const uint32_t blend_unscaled = src_channel * src_a + dst_channel * dst_a;
assert(blend_unscaled < (1ULL << 32) / scale);
return (blend_unscaled * scale) >> 24;
return (blend_unscaled * scale) >> CHANNEL_SHIFT(3);
}
// Blend 'src' over 'dst' assuming they are NOT pre-multiplied by alpha.
static uint32_t BlendPixelNonPremult(uint32_t src, uint32_t dst) {
const uint8_t src_a = (src >> 24) & 0xff;
const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
if (src_a == 0) {
return dst;
} else {
const uint8_t dst_a = (dst >> 24) & 0xff;
const uint8_t dst_a = (dst >> CHANNEL_SHIFT(3)) & 0xff;
// This is the approximate integer arithmetic for the actual formula:
// dst_factor_a = (dst_a * (255 - src_a)) / 255.
const uint8_t dst_factor_a = (dst_a * (256 - src_a)) >> 8;
const uint8_t blend_a = src_a + dst_factor_a;
const uint32_t scale = (1UL << 24) / blend_a;
const uint8_t blend_r =
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 0);
const uint8_t blend_g =
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 8);
const uint8_t blend_b =
BlendChannelNonPremult(src, src_a, dst, dst_factor_a, scale, 16);
const uint8_t blend_r = BlendChannelNonPremult(
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(0));
const uint8_t blend_g = BlendChannelNonPremult(
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(1));
const uint8_t blend_b = BlendChannelNonPremult(
src, src_a, dst, dst_factor_a, scale, CHANNEL_SHIFT(2));
assert(src_a + dst_factor_a < 256);
return (blend_r << 0) |
(blend_g << 8) |
(blend_b << 16) |
((uint32_t)blend_a << 24);
return ((uint32_t)blend_r << CHANNEL_SHIFT(0)) |
((uint32_t)blend_g << CHANNEL_SHIFT(1)) |
((uint32_t)blend_b << CHANNEL_SHIFT(2)) |
((uint32_t)blend_a << CHANNEL_SHIFT(3));
}
}
@ -247,7 +255,7 @@ static void BlendPixelRowNonPremult(uint32_t* const src,
const uint32_t* const dst, int num_pixels) {
int i;
for (i = 0; i < num_pixels; ++i) {
const uint8_t src_alpha = (src[i] >> 24) & 0xff;
const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
if (src_alpha != 0xff) {
src[i] = BlendPixelNonPremult(src[i], dst[i]);
}
@ -264,7 +272,7 @@ static WEBP_INLINE uint32_t ChannelwiseMultiply(uint32_t pix, uint32_t scale) {
// Blend 'src' over 'dst' assuming they are pre-multiplied by alpha.
static uint32_t BlendPixelPremult(uint32_t src, uint32_t dst) {
const uint8_t src_a = (src >> 24) & 0xff;
const uint8_t src_a = (src >> CHANNEL_SHIFT(3)) & 0xff;
return src + ChannelwiseMultiply(dst, 256 - src_a);
}
@ -274,7 +282,7 @@ static void BlendPixelRowPremult(uint32_t* const src, const uint32_t* const dst,
int num_pixels) {
int i;
for (i = 0; i < num_pixels; ++i) {
const uint8_t src_alpha = (src[i] >> 24) & 0xff;
const uint8_t src_alpha = (src[i] >> CHANNEL_SHIFT(3)) & 0xff;
if (src_alpha != 0xff) {
src[i] = BlendPixelPremult(src[i], dst[i]);
}

View file

@ -25,7 +25,7 @@
#define DMUX_MAJ_VERSION 1
#define DMUX_MIN_VERSION 2
#define DMUX_REV_VERSION 1
#define DMUX_REV_VERSION 2
typedef struct {
size_t start_; // start location of the data

View file

@ -119,7 +119,12 @@ extern "C" {
#define WEBP_USE_NEON
#endif
#if defined(_MSC_VER) && _MSC_VER >= 1700 && defined(_M_ARM)
// Note: ARM64 is supported in Visual Studio 2017, but requires the direct
// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in
// arm_neon.h.
#if defined(_MSC_VER) && \
((_MSC_VER >= 1700 && defined(_M_ARM)) || \
(_MSC_VER >= 1920 && defined(_M_ARM64)))
#define WEBP_USE_NEON
#define WEBP_USE_INTRINSICS
#endif

View file

@ -9,7 +9,7 @@
//
// ARM NEON version of speed-critical encoding functions.
//
// adapted from libvpx (http://www.webmproject.org/code/)
// adapted from libvpx (https://www.webmproject.org/code/)
#include "src/dsp/dsp.h"

View file

@ -107,63 +107,77 @@ static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
//------------------------------------------------------------------------------
// Predictors
uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top) {
uint32_t VP8LPredictor0_C(const uint32_t* const left,
const uint32_t* const top) {
(void)top;
(void)left;
return ARGB_BLACK;
}
uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top) {
uint32_t VP8LPredictor1_C(const uint32_t* const left,
const uint32_t* const top) {
(void)top;
return left;
return *left;
}
uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top) {
uint32_t VP8LPredictor2_C(const uint32_t* const left,
const uint32_t* const top) {
(void)left;
return top[0];
}
uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top) {
uint32_t VP8LPredictor3_C(const uint32_t* const left,
const uint32_t* const top) {
(void)left;
return top[1];
}
uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top) {
uint32_t VP8LPredictor4_C(const uint32_t* const left,
const uint32_t* const top) {
(void)left;
return top[-1];
}
uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average3(left, top[0], top[1]);
uint32_t VP8LPredictor5_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average3(*left, top[0], top[1]);
return pred;
}
uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2(left, top[-1]);
uint32_t VP8LPredictor6_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2(*left, top[-1]);
return pred;
}
uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2(left, top[0]);
uint32_t VP8LPredictor7_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2(*left, top[0]);
return pred;
}
uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top) {
uint32_t VP8LPredictor8_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2(top[-1], top[0]);
(void)left;
return pred;
}
uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top) {
uint32_t VP8LPredictor9_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2(top[0], top[1]);
(void)left;
return pred;
}
uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
uint32_t VP8LPredictor10_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average4(*left, top[-1], top[0], top[1]);
return pred;
}
uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Select(top[0], left, top[-1]);
uint32_t VP8LPredictor11_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Select(top[0], *left, top[-1]);
return pred;
}
uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
uint32_t VP8LPredictor12_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractFull(*left, top[0], top[-1]);
return pred;
}
uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
uint32_t VP8LPredictor13_C(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractHalf(*left, top[0], top[-1]);
return pred;
}

View file

@ -28,23 +28,38 @@ extern "C" {
//------------------------------------------------------------------------------
// Decoding
typedef uint32_t (*VP8LPredictorFunc)(uint32_t left, const uint32_t* const top);
typedef uint32_t (*VP8LPredictorFunc)(const uint32_t* const left,
const uint32_t* const top);
extern VP8LPredictorFunc VP8LPredictors[16];
uint32_t VP8LPredictor0_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor1_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor2_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor3_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor4_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor5_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor6_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor7_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor8_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor9_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor10_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor11_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor12_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor13_C(uint32_t left, const uint32_t* const top);
uint32_t VP8LPredictor0_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor1_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor2_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor3_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor4_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor5_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor6_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor7_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor8_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor9_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor10_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor11_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor12_C(const uint32_t* const left,
const uint32_t* const top);
uint32_t VP8LPredictor13_C(const uint32_t* const left,
const uint32_t* const top);
// These Add/Sub function expects upper[-1] and out[-1] to be readable.
typedef void (*VP8LPredictorAddSubFunc)(const uint32_t* in,

View file

@ -179,7 +179,7 @@ static void PREDICTOR_ADD(const uint32_t* in, const uint32_t* upper, \
int x; \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = (PREDICTOR)(out[x - 1], upper + x); \
const uint32_t pred = (PREDICTOR)(&out[x - 1], upper + x); \
out[x] = VP8LAddPixels(in[x], pred); \
} \
}

View file

@ -745,7 +745,7 @@ static void PredictorSub##PREDICTOR_I##_C(const uint32_t* in, \
assert(upper != NULL); \
for (x = 0; x < num_pixels; ++x) { \
const uint32_t pred = \
VP8LPredictor##PREDICTOR_I##_C(in[x - 1], upper + x); \
VP8LPredictor##PREDICTOR_I##_C(&in[x - 1], upper + x); \
out[x] = VP8LSubPixels(in[x], pred); \
} \
}

View file

@ -188,46 +188,51 @@ static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
return Average2(Average2(a0, a1), Average2(a2, a3));
}
static uint32_t Predictor5_MIPSdspR2(uint32_t left, const uint32_t* const top) {
return Average3(left, top[0], top[1]);
static uint32_t Predictor5_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
return Average3(*left, top[0], top[1]);
}
static uint32_t Predictor6_MIPSdspR2(uint32_t left, const uint32_t* const top) {
return Average2(left, top[-1]);
static uint32_t Predictor6_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
return Average2(*left, top[-1]);
}
static uint32_t Predictor7_MIPSdspR2(uint32_t left, const uint32_t* const top) {
return Average2(left, top[0]);
static uint32_t Predictor7_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
return Average2(*left, top[0]);
}
static uint32_t Predictor8_MIPSdspR2(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor8_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
(void)left;
return Average2(top[-1], top[0]);
}
static uint32_t Predictor9_MIPSdspR2(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor9_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
(void)left;
return Average2(top[0], top[1]);
}
static uint32_t Predictor10_MIPSdspR2(uint32_t left,
static uint32_t Predictor10_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
return Average4(left, top[-1], top[0], top[1]);
return Average4(*left, top[-1], top[0], top[1]);
}
static uint32_t Predictor11_MIPSdspR2(uint32_t left,
static uint32_t Predictor11_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
return Select(top[0], left, top[-1]);
return Select(top[0], *left, top[-1]);
}
static uint32_t Predictor12_MIPSdspR2(uint32_t left,
static uint32_t Predictor12_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
return ClampedAddSubtractFull(left, top[0], top[-1]);
return ClampedAddSubtractFull(*left, top[0], top[-1]);
}
static uint32_t Predictor13_MIPSdspR2(uint32_t left,
static uint32_t Predictor13_MIPSdspR2(const uint32_t* const left,
const uint32_t* const top) {
return ClampedAddSubtractHalf(left, top[0], top[-1]);
return ClampedAddSubtractHalf(*left, top[0], top[-1]);
}
// Add green to blue and red channels (i.e. perform the inverse transform of

View file

@ -188,17 +188,21 @@ static WEBP_INLINE uint32_t Average3_NEON(uint32_t a0, uint32_t a1,
return avg;
}
static uint32_t Predictor5_NEON(uint32_t left, const uint32_t* const top) {
return Average3_NEON(left, top[0], top[1]);
static uint32_t Predictor5_NEON(const uint32_t* const left,
const uint32_t* const top) {
return Average3_NEON(*left, top[0], top[1]);
}
static uint32_t Predictor6_NEON(uint32_t left, const uint32_t* const top) {
return Average2_NEON(left, top[-1]);
static uint32_t Predictor6_NEON(const uint32_t* const left,
const uint32_t* const top) {
return Average2_NEON(*left, top[-1]);
}
static uint32_t Predictor7_NEON(uint32_t left, const uint32_t* const top) {
return Average2_NEON(left, top[0]);
static uint32_t Predictor7_NEON(const uint32_t* const left,
const uint32_t* const top) {
return Average2_NEON(*left, top[0]);
}
static uint32_t Predictor13_NEON(uint32_t left, const uint32_t* const top) {
return ClampedAddSubtractHalf_NEON(left, top[0], top[-1]);
static uint32_t Predictor13_NEON(const uint32_t* const left,
const uint32_t* const top) {
return ClampedAddSubtractHalf_NEON(*left, top[0], top[-1]);
}
// Batch versions of those functions.

View file

@ -138,42 +138,51 @@ static WEBP_INLINE uint32_t Average4_SSE2(uint32_t a0, uint32_t a1,
return output;
}
static uint32_t Predictor5_SSE2(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average3_SSE2(left, top[0], top[1]);
static uint32_t Predictor5_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average3_SSE2(*left, top[0], top[1]);
return pred;
}
static uint32_t Predictor6_SSE2(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2_SSE2(left, top[-1]);
static uint32_t Predictor6_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2_SSE2(*left, top[-1]);
return pred;
}
static uint32_t Predictor7_SSE2(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average2_SSE2(left, top[0]);
static uint32_t Predictor7_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2_SSE2(*left, top[0]);
return pred;
}
static uint32_t Predictor8_SSE2(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor8_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2_SSE2(top[-1], top[0]);
(void)left;
return pred;
}
static uint32_t Predictor9_SSE2(uint32_t left, const uint32_t* const top) {
static uint32_t Predictor9_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average2_SSE2(top[0], top[1]);
(void)left;
return pred;
}
static uint32_t Predictor10_SSE2(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Average4_SSE2(left, top[-1], top[0], top[1]);
static uint32_t Predictor10_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Average4_SSE2(*left, top[-1], top[0], top[1]);
return pred;
}
static uint32_t Predictor11_SSE2(uint32_t left, const uint32_t* const top) {
const uint32_t pred = Select_SSE2(top[0], left, top[-1]);
static uint32_t Predictor11_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = Select_SSE2(top[0], *left, top[-1]);
return pred;
}
static uint32_t Predictor12_SSE2(uint32_t left, const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractFull_SSE2(left, top[0], top[-1]);
static uint32_t Predictor12_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractFull_SSE2(*left, top[0], top[-1]);
return pred;
}
static uint32_t Predictor13_SSE2(uint32_t left, const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractHalf_SSE2(left, top[0], top[-1]);
static uint32_t Predictor13_SSE2(const uint32_t* const left,
const uint32_t* const top) {
const uint32_t pred = ClampedAddSubtractHalf_SSE2(*left, top[0], top[-1]);
return pred;
}

View file

@ -14,6 +14,10 @@
#ifndef WEBP_DSP_MSA_MACRO_H_
#define WEBP_DSP_MSA_MACRO_H_
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_MSA)
#include <stdint.h>
#include <msa.h>
@ -1389,4 +1393,5 @@ static WEBP_INLINE uint32_t func_hadd_uh_u32(v8u16 in) {
} while (0)
#define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__)
#endif // WEBP_USE_MSA
#endif // WEBP_DSP_MSA_MACRO_H_

View file

@ -12,10 +12,12 @@
#ifndef WEBP_DSP_NEON_H_
#define WEBP_DSP_NEON_H_
#include <arm_neon.h>
#include "src/dsp/dsp.h"
#if defined(WEBP_USE_NEON)
#include <arm_neon.h>
// Right now, some intrinsics functions seem slower, so we disable them
// everywhere except newer clang/gcc or aarch64 where the inline assembly is
// incompatible.
@ -98,4 +100,5 @@ static WEBP_INLINE int32x4x4_t Transpose4x4_NEON(const int32x4x4_t rows) {
} while (0)
#endif
#endif // WEBP_USE_NEON
#endif // WEBP_DSP_NEON_H_

View file

@ -10,7 +10,7 @@
// inline YUV<->RGB conversion function
//
// The exact naming is Y'CbCr, following the ITU-R BT.601 standard.
// More information at: http://en.wikipedia.org/wiki/YCbCr
// More information at: https://en.wikipedia.org/wiki/YCbCr
// Y = 0.2569 * R + 0.5044 * G + 0.0979 * B + 16
// U = -0.1483 * R - 0.2911 * G + 0.4394 * B + 128
// V = 0.4394 * R - 0.3679 * G - 0.0715 * B + 128

View file

@ -778,6 +778,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
// Roughly refresh the proba eight times per pass
int max_count = (enc->mb_w_ * enc->mb_h_) >> 3;
int num_pass_left = enc->config_->pass;
int remaining_progress = 40; // percents
const int do_search = enc->do_search_;
VP8EncIterator it;
VP8EncProba* const proba = &enc->proba_;
@ -805,6 +806,9 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
uint64_t size_p0 = 0;
uint64_t distortion = 0;
int cnt = max_count;
// The final number of passes is not trivial to know in advance.
const int pass_progress = remaining_progress / (2 + num_pass_left);
remaining_progress -= pass_progress;
VP8IteratorInit(enc, &it);
SetLoopParams(enc, stats.q);
if (is_last_pass) {
@ -832,7 +836,7 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
StoreSideInfo(&it);
VP8StoreFilterStats(&it);
VP8IteratorExport(&it);
ok = VP8IteratorProgress(&it, 20);
ok = VP8IteratorProgress(&it, pass_progress);
}
VP8IteratorSaveBoundary(&it);
} while (ok && VP8IteratorNext(&it));
@ -878,7 +882,8 @@ int VP8EncTokenLoop(VP8Encoder* const enc) {
ok = VP8EmitTokens(&enc->tokens_, enc->parts_ + 0,
(const uint8_t*)proba->coeffs_, 1);
}
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + 20, &enc->percent_);
ok = ok && WebPReportProgress(enc->pic_, enc->percent_ + remaining_progress,
&enc->percent_);
return PostLoopFinalize(&it, ok);
}

View file

@ -249,7 +249,7 @@ static WEBP_INLINE void GetResidual(
} else if (x == 0) {
predict = upper_row[x]; // Top.
} else {
predict = pred_func(current_row[x - 1], upper_row + x);
predict = pred_func(&current_row[x - 1], upper_row + x);
}
#if (WEBP_NEAR_LOSSLESS == 1)
if (max_quantization == 1 || mode == 0 || y == 0 || y == height - 1 ||

View file

@ -585,6 +585,9 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate,
return rate * lambda + RD_DISTO_MULT * distortion;
}
// Coefficient type.
enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 };
static int TrellisQuantizeBlock(const VP8Encoder* const enc,
int16_t in[16], int16_t out[16],
int ctx0, int coeff_type,
@ -593,7 +596,7 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type];
CostArrayPtr const costs =
(CostArrayPtr)enc->proba_.remapped_costs_[coeff_type];
const int first = (coeff_type == 0) ? 1 : 0;
const int first = (coeff_type == TYPE_I16_AC) ? 1 : 0;
Node nodes[16][NUM_NODES];
ScoreState score_states[2][NUM_NODES];
ScoreState* ss_cur = &SCORE_STATE(0, MIN_DELTA);
@ -657,16 +660,17 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
// test all alternate level values around level0.
for (m = -MIN_DELTA; m <= MAX_DELTA; ++m) {
Node* const cur = &NODE(n, m);
int level = level0 + m;
const int level = level0 + m;
const int ctx = (level > 2) ? 2 : level;
const int band = VP8EncBands[n + 1];
score_t base_score;
score_t best_cur_score = MAX_COST;
int best_prev = 0; // default, in case
score_t best_cur_score;
int best_prev;
score_t cost, score;
ss_cur[m].score = MAX_COST;
ss_cur[m].costs = costs[n + 1][ctx];
if (level < 0 || level > thresh_level) {
ss_cur[m].score = MAX_COST;
// Node is dead.
continue;
}
@ -682,18 +686,24 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
}
// Inspect all possible non-dead predecessors. Retain only the best one.
for (p = -MIN_DELTA; p <= MAX_DELTA; ++p) {
// The base_score is added to all scores so it is only added for the final
// value after the loop.
cost = VP8LevelCost(ss_prev[-MIN_DELTA].costs, level);
best_cur_score =
ss_prev[-MIN_DELTA].score + RDScoreTrellis(lambda, cost, 0);
best_prev = -MIN_DELTA;
for (p = -MIN_DELTA + 1; p <= MAX_DELTA; ++p) {
// Dead nodes (with ss_prev[p].score >= MAX_COST) are automatically
// eliminated since their score can't be better than the current best.
const score_t cost = VP8LevelCost(ss_prev[p].costs, level);
cost = VP8LevelCost(ss_prev[p].costs, level);
// Examine node assuming it's a non-terminal one.
const score_t score =
base_score + ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
score = ss_prev[p].score + RDScoreTrellis(lambda, cost, 0);
if (score < best_cur_score) {
best_cur_score = score;
best_prev = p;
}
}
best_cur_score += base_score;
// Store best finding in current node.
cur->sign = sign;
cur->level = level;
@ -701,11 +711,11 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
ss_cur[m].score = best_cur_score;
// Now, record best terminal node (and thus best entry in the graph).
if (level != 0) {
if (level != 0 && best_cur_score < best_score) {
const score_t last_pos_cost =
(n < 15) ? VP8BitCost(0, probas[band][ctx][0]) : 0;
const score_t last_pos_score = RDScoreTrellis(lambda, last_pos_cost, 0);
const score_t score = best_cur_score + last_pos_score;
score = best_cur_score + last_pos_score;
if (score < best_score) {
best_score = score;
best_path[0] = n; // best eob position
@ -717,10 +727,16 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc,
}
// Fresh start
memset(in + first, 0, (16 - first) * sizeof(*in));
memset(out + first, 0, (16 - first) * sizeof(*out));
// Beware! We must preserve in[0]/out[0] value for TYPE_I16_AC case.
if (coeff_type == TYPE_I16_AC) {
memset(in + 1, 0, 15 * sizeof(*in));
memset(out + 1, 0, 15 * sizeof(*out));
} else {
memset(in, 0, 16 * sizeof(*in));
memset(out, 0, 16 * sizeof(*out));
}
if (best_path[0] == -1) {
return 0; // skip!
return 0; // skip!
}
{
@ -775,9 +791,9 @@ static int ReconstructIntra16(VP8EncIterator* const it,
for (y = 0, n = 0; y < 4; ++y) {
for (x = 0; x < 4; ++x, ++n) {
const int ctx = it->top_nz_[x] + it->left_nz_[y];
const int non_zero =
TrellisQuantizeBlock(enc, tmp[n], rd->y_ac_levels[n], ctx, 0,
&dqm->y1_, dqm->lambda_trellis_i16_);
const int non_zero = TrellisQuantizeBlock(
enc, tmp[n], rd->y_ac_levels[n], ctx, TYPE_I16_AC, &dqm->y1_,
dqm->lambda_trellis_i16_);
it->top_nz_[x] = it->left_nz_[y] = non_zero;
rd->y_ac_levels[n][0] = 0;
nz |= non_zero << n;
@ -818,7 +834,7 @@ static int ReconstructIntra4(VP8EncIterator* const it,
if (DO_TRELLIS_I4 && it->do_trellis_) {
const int x = it->i4_ & 3, y = it->i4_ >> 2;
const int ctx = it->top_nz_[x] + it->left_nz_[y];
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, 3, &dqm->y1_,
nz = TrellisQuantizeBlock(enc, tmp, levels, ctx, TYPE_I4_AC, &dqm->y1_,
dqm->lambda_trellis_i4_);
} else {
nz = VP8EncQuantizeBlock(tmp, levels, &dqm->y1_);
@ -927,9 +943,9 @@ static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd,
for (y = 0; y < 2; ++y) {
for (x = 0; x < 2; ++x, ++n) {
const int ctx = it->top_nz_[4 + ch + x] + it->left_nz_[4 + ch + y];
const int non_zero =
TrellisQuantizeBlock(enc, tmp[n], rd->uv_levels[n], ctx, 2,
&dqm->uv_, dqm->lambda_trellis_uv_);
const int non_zero = TrellisQuantizeBlock(
enc, tmp[n], rd->uv_levels[n], ctx, TYPE_CHROMA_A, &dqm->uv_,
dqm->lambda_trellis_uv_);
it->top_nz_[4 + ch + x] = it->left_nz_[4 + ch + y] = non_zero;
nz |= non_zero << n;
}

View file

@ -32,7 +32,7 @@ extern "C" {
// version numbers
#define ENC_MAJ_VERSION 1
#define ENC_MIN_VERSION 2
#define ENC_REV_VERSION 1
#define ENC_REV_VERSION 2
enum { MAX_LF_LEVELS = 64, // Maximum loop filter level
MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost

View file

@ -29,7 +29,7 @@ extern "C" {
#define MUX_MAJ_VERSION 1
#define MUX_MIN_VERSION 2
#define MUX_REV_VERSION 1
#define MUX_REV_VERSION 2
// Chunk object.
typedef struct WebPChunk WebPChunk;

View file

@ -161,7 +161,7 @@ static void SetBitDepths(const HuffmanTree* const tree,
// especially when population counts are longer than 2**tree_limit, but
// we are not planning to use this with extremely long blocks.
//
// See http://en.wikipedia.org/wiki/Huffman_coding
// See https://en.wikipedia.org/wiki/Huffman_coding
static void GenerateOptimalTree(const uint32_t* const histogram,
int histogram_size,
HuffmanTree* tree, int tree_depth_limit,

View file

@ -30,7 +30,7 @@
#define DFIX 4 // extra precision for ordered dithering
#define DSIZE 4 // dithering size (must be a power of two)
// cf. http://en.wikipedia.org/wiki/Ordered_dithering
// cf. https://en.wikipedia.org/wiki/Ordered_dithering
static const uint8_t kOrderedDither[DSIZE][DSIZE] = {
{ 0, 8, 2, 10 }, // coefficients are in DFIX fixed-point precision
{ 12, 4, 14, 6 },

View file

@ -23,7 +23,7 @@
// alloc/free etc) is printed. For debugging/tuning purpose only (it's slow,
// and not multi-thread safe!).
// An interesting alternative is valgrind's 'massif' tool:
// http://valgrind.org/docs/manual/ms-manual.html
// https://valgrind.org/docs/manual/ms-manual.html
// Here is an example command line:
/* valgrind --tool=massif --massif-out-file=massif.out \
--stacks=yes --alloc-fn=WebPSafeMalloc --alloc-fn=WebPSafeCalloc

View file

@ -85,7 +85,7 @@ WEBP_EXTERN uint8_t* WebPDecodeBGR(const uint8_t* data, size_t data_size,
// Upon return, the Y buffer has a stride returned as '*stride', while U and V
// have a common stride returned as '*uv_stride'.
// Return NULL in case of error.
// (*) Also named Y'CbCr. See: http://en.wikipedia.org/wiki/YCbCr
// (*) Also named Y'CbCr. See: https://en.wikipedia.org/wiki/YCbCr
WEBP_EXTERN uint8_t* WebPDecodeYUV(const uint8_t* data, size_t data_size,
int* width, int* height,
uint8_t** u, uint8_t** v,