From 284af7be96e4a5a53eb5215688bad2326f785d38 Mon Sep 17 00:00:00 2001 From: Aaron Franke Date: Mon, 11 Jul 2022 17:13:38 -0500 Subject: [PATCH 01/17] Allow exporting PCK files without export templates installed (cherry picked from commit 8d81ce9561748b9a231af70323ddffd288e189ab) --- editor/project_export.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/editor/project_export.cpp b/editor/project_export.cpp index d4d56047e58..4aea080d330 100644 --- a/editor/project_export.cpp +++ b/editor/project_export.cpp @@ -222,6 +222,7 @@ void ProjectExportDialog::_edit_preset(int p_index) { export_path->show(); duplicate_preset->set_disabled(false); delete_preset->set_disabled(false); + get_ok()->set_disabled(false); name->set_text(current->get_name()); List extension_list = current->get_platform()->get_binary_extensions(current); @@ -267,8 +268,6 @@ void ProjectExportDialog::_edit_preset(int p_index) { } export_button->set_disabled(true); - get_ok()->set_disabled(true); - } else { if (error != String()) { Vector items = error.split("\n", false); @@ -287,7 +286,6 @@ void ProjectExportDialog::_edit_preset(int p_index) { export_error->hide(); export_templates_error->hide(); export_button->set_disabled(false); - get_ok()->set_disabled(false); } custom_features->set_text(current->get_custom_features()); From 637330b86674f7356c80189d815c25808ff2ddf7 Mon Sep 17 00:00:00 2001 From: Hugo Locurcio Date: Mon, 8 Aug 2022 23:06:54 +0200 Subject: [PATCH 02/17] Improve InstancePlaceholder documentation (cherry picked from commit 5acf30b5380d71472657fbba84cad8739a5d7c1e) --- doc/classes/InstancePlaceholder.xml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/doc/classes/InstancePlaceholder.xml b/doc/classes/InstancePlaceholder.xml index d4bea11139e..a9cc341e6b5 100644 --- a/doc/classes/InstancePlaceholder.xml +++ b/doc/classes/InstancePlaceholder.xml @@ -15,7 +15,8 @@ - Not thread-safe. Use [method Object.call_deferred] if calling from a thread. + Call this method to actually load in the node. The created node will be placed as a sibling [i]above[/i] the [InstancePlaceholder] in the scene tree. The [Node]'s reference is also returned for convenience. + [b]Note:[/b] [method create_instance] is not thread-safe. Use [method Object.call_deferred] if calling from a thread. @@ -28,6 +29,8 @@ + Returns the list of properties that will be applied to the node when [method create_instance] is called. + If [code]with_order[/code] is [code]true[/code], a key named [code].order[/code] (note the leading period) is added to the dictionary. This [code].order[/code] key is an [Array] of [String] property names specifying the order in which properties will be applied (with index 0 being the first). From ddb568731a92e3891f27e06b1123a43b4ca47691 Mon Sep 17 00:00:00 2001 From: Marlin Watts-Woods Date: Mon, 8 Aug 2022 21:08:26 -0700 Subject: [PATCH 03/17] Added to Sprite3D Documentation (cherry picked from commit f8f665b03e2c2b2a10ffcaf815c511cd31a70887) --- doc/classes/Sprite3D.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/classes/Sprite3D.xml b/doc/classes/Sprite3D.xml index 10cdd458490..6293ba3d4eb 100644 --- a/doc/classes/Sprite3D.xml +++ b/doc/classes/Sprite3D.xml @@ -27,7 +27,7 @@ The region of the atlas texture to display. [member region_enabled] must be [code]true[/code]. - [Texture] object to draw. If [member GeometryInstance.material_override] is used, this will be overridden. + [Texture] object to draw. If [member GeometryInstance.material_override] is used, this will be overridden. The size information is still used. The number of rows in the sprite sheet. From 09fb05bc05d924f86b4ad89156b161a0b5a02bf0 Mon Sep 17 00:00:00 2001 From: Dan Boorstein Date: Tue, 9 Aug 2022 16:31:12 -0800 Subject: [PATCH 04/17] Add note about using AudioListener2D There was no mention of the effect of AudioListener2D in this documentation, making it unclear if there was a relationship. The new text is copied and modified from the AudioStreamPlayer3D documentation. Use 'an' as article before 'AudioListener3D' (cherry picked from commit a7ebc6fb18bcbcf817895955eed619588b3cd8b8) --- doc/classes/AudioStreamPlayer2D.xml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/classes/AudioStreamPlayer2D.xml b/doc/classes/AudioStreamPlayer2D.xml index 2ffaeb7f2e5..72f1a949a70 100644 --- a/doc/classes/AudioStreamPlayer2D.xml +++ b/doc/classes/AudioStreamPlayer2D.xml @@ -4,7 +4,8 @@ Plays positional sound in 2D space. - Plays audio that dampens with distance from screen center. + Plays audio that dampens with distance from a given position. + By default, audio is heard from the screen center. This can be changed by adding a [Listener2D] node to the scene and enabling it by calling [method Listener2D.make_current] on it. See also [AudioStreamPlayer] to play a sound non-positionally. [b]Note:[/b] Hiding an [AudioStreamPlayer2D] node does not disable its audio output. To temporarily disable an [AudioStreamPlayer2D]'s audio output, set [member volume_db] to a very low value like [code]-100[/code] (which isn't audible to human hearing). From b808923b5412cfe7e5f8791f81e4c40dcaf320bd Mon Sep 17 00:00:00 2001 From: Michael Alexsander Date: Tue, 9 Aug 2022 19:04:40 -0300 Subject: [PATCH 05/17] Fix error when switching to another `GridMap` with an item with higher index selected (cherry picked from commit fe16aecbacaef37b8a56de34c9cb5a69f7a26644) --- modules/gridmap/grid_map_editor_plugin.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/gridmap/grid_map_editor_plugin.cpp b/modules/gridmap/grid_map_editor_plugin.cpp index 77818d737d4..20bcefbe573 100644 --- a/modules/gridmap/grid_map_editor_plugin.cpp +++ b/modules/gridmap/grid_map_editor_plugin.cpp @@ -904,10 +904,12 @@ void GridMapEditor::update_palette() { } if (selected != -1 && mesh_library_palette->get_item_count() > 0) { - mesh_library_palette->select(selected); + // Make sure that this variable is set correctly. + selected_palette = MIN(selected, mesh_library_palette->get_item_count() - 1); + mesh_library_palette->select(selected_palette); } - last_mesh_library = mesh_library.operator->(); + last_mesh_library = *mesh_library; } void GridMapEditor::edit(GridMap *p_gridmap) { From 0876491983209c9170cc31a5900b606c1c91dbc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Wed, 10 Aug 2022 21:42:49 +0200 Subject: [PATCH 06/17] Clean iOS platform config of long gone macro (cherry picked from commit 8c2a577350924d1f4a8e0a98ebf85bea96ea7e63) --- platform/iphone/platform_config.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/platform/iphone/platform_config.h b/platform/iphone/platform_config.h index a1b598fe2ab..0f3acb258f5 100644 --- a/platform/iphone/platform_config.h +++ b/platform/iphone/platform_config.h @@ -33,8 +33,6 @@ #define GLES2_INCLUDE_H #define GLES3_INCLUDE_H -#define PLATFORM_REFCOUNT - #define PTHREAD_RENAME_SELF #define _weakify(var) __weak typeof(var) GDWeak_##var = var; From 457a985d701a6390d47317c9c6c7ab31603aa0ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pedro=20J=2E=20Est=C3=A9banez?= Date: Fri, 12 Aug 2022 08:01:56 +0200 Subject: [PATCH 07/17] Fix jumbled error output when using Windows spawn fix (cherry picked from commit 4a3cb1447351c2e915f2beb8c34780d2c6fe3e43) --- methods.py | 1 + 1 file changed, 1 insertion(+) diff --git a/methods.py b/methods.py index efcfdbe15a0..91135a1b910 100644 --- a/methods.py +++ b/methods.py @@ -333,6 +333,7 @@ def use_windows_spawn_fix(self, platform=None): startupinfo=startupinfo, shell=False, env=env, + text=True, ) _, err = proc.communicate() rv = proc.wait() From a0fc7e80d9c078f8335f77f32c8dabcde7555eb0 Mon Sep 17 00:00:00 2001 From: kobewi Date: Wed, 17 Aug 2022 02:28:13 +0200 Subject: [PATCH 08/17] Mention the stretching behavior of PopupPanel (cherry picked from commit a2b088b1c1b79950212287fd4c8539a67cfcb56f) --- doc/classes/PopupPanel.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/classes/PopupPanel.xml b/doc/classes/PopupPanel.xml index 76f0c8ad3c4..6afcc9ca859 100644 --- a/doc/classes/PopupPanel.xml +++ b/doc/classes/PopupPanel.xml @@ -5,6 +5,7 @@ Class for displaying popups with a panel background. In some cases it might be simpler to use than [Popup], since it provides a configurable background. If you are making windows, better check [WindowDialog]. + If any [Control] node is added as a child of this [PopupPanel], it will be stretched to fit the panel's size (similar to how [PanelContainer] works). From cfa9b772dadd2d2e070b74bc18ff7127274c5139 Mon Sep 17 00:00:00 2001 From: Micky Date: Sat, 13 Aug 2022 00:30:40 +0200 Subject: [PATCH 09/17] Improve documentation of CanvasItem's draw logic (cherry picked from commit 953c78eaff2fc63e18c5da66c809c7e2b7666b11) --- doc/classes/CanvasItem.xml | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/doc/classes/CanvasItem.xml b/doc/classes/CanvasItem.xml index b516100127a..dc8ef419656 100644 --- a/doc/classes/CanvasItem.xml +++ b/doc/classes/CanvasItem.xml @@ -5,7 +5,7 @@ Base class of anything 2D. Canvas items are laid out in a tree; children inherit and extend their parent's transform. [CanvasItem] is extended by [Control] for anything GUI-related, and by [Node2D] for anything related to the 2D engine. - Any [CanvasItem] can draw. For this, [method update] must be called, then [constant NOTIFICATION_DRAW] will be received on idle time to request redraw. Because of this, canvas items don't need to be redrawn on every frame, improving the performance significantly. Several functions for drawing on the [CanvasItem] are provided (see [code]draw_*[/code] functions). However, they can only be used inside the [method Object._notification], signal or [method _draw] virtual functions. + Any [CanvasItem] can draw. For this, [method update] is called by the engine, then [constant NOTIFICATION_DRAW] will be received on idle time to request redraw. Because of this, canvas items don't need to be redrawn on every frame, improving the performance significantly. Several functions for drawing on the [CanvasItem] are provided (see [code]draw_*[/code] functions). However, they can only be used inside [method _draw], its corresponding [method Object._notification] or methods connected to the [signal draw] signal. Canvas items are drawn in tree order. By default, children are on top of their parents so a root [CanvasItem] will be drawn behind everything. This behavior can be changed on a per-item basis. A [CanvasItem] can also be hidden, which will also hide its children. It provides many ways to change parameters such as modulation (for itself and its children) and self modulation (only for itself), as well as its blend mode. Ultimately, a transform notification can be requested, which will notify the node that its global position changed in case the parent tree changed. @@ -20,7 +20,8 @@ - Overridable function called by the engine (if defined) to draw the canvas item. + Called when [CanvasItem] has been requested to redraw (when [method update] is called, either manually or by the engine). + Corresponds to the [constant NOTIFICATION_DRAW] notification in [method Object._notification]. @@ -366,7 +367,7 @@ - Returns [code]true[/code] if the node is present in the [SceneTree], its [member visible] property is [code]true[/code] and all its antecedents are also visible. If any antecedent is hidden, this node will not be visible in the scene tree. + Returns [code]true[/code] if the node is present in the [SceneTree], its [member visible] property is [code]true[/code] and all its antecedents are also visible. If any antecedent is hidden, this node will not be visible in the scene tree, and is consequently not drawn (see [method _draw]). @@ -413,7 +414,7 @@ - Queue the [CanvasItem] for update. [constant NOTIFICATION_DRAW] will be called on idle time to request redraw. + Queues the [CanvasItem] to redraw. During idle time, if [CanvasItem] is visible, [constant NOTIFICATION_DRAW] is sent and [method _draw] is called. This only occurs [b]once[/b] per frame, even if this method has been called multiple times. @@ -447,7 +448,8 @@ - Emitted when the [CanvasItem] must redraw. This can only be connected realtime, as deferred will not allow drawing. + Emitted when the [CanvasItem] must redraw, [i]after[/i] the related [constant NOTIFICATION_DRAW] notification, and [i]before[/i] [method _draw] is called. + [b]Note:[/b] Deferred connections do not allow drawing through the [code]draw_*[/code] methods. @@ -492,7 +494,7 @@ The [CanvasItem]'s local transform has changed. This notification is only received if enabled by [method set_notify_local_transform]. - The [CanvasItem] is requested to draw. + The [CanvasItem] is requested to draw (see [method _draw]). The [CanvasItem]'s visibility has changed. From c8810395a2a94090bc28e32d0231cc00d722dc60 Mon Sep 17 00:00:00 2001 From: bitsawer Date: Mon, 15 Aug 2022 20:27:29 +0300 Subject: [PATCH 10/17] Fix Windows list dir handle leak (cherry picked from commit 40325006b6c08c32b3db9dfd901e54d44b9d4dc3) --- drivers/windows/dir_access_windows.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/windows/dir_access_windows.cpp b/drivers/windows/dir_access_windows.cpp index 8de5908c122..f9656f28012 100644 --- a/drivers/windows/dir_access_windows.cpp +++ b/drivers/windows/dir_access_windows.cpp @@ -403,6 +403,8 @@ DirAccessWindows::DirAccessWindows() { } DirAccessWindows::~DirAccessWindows() { + list_dir_end(); + memdelete(p); } From 562d7c616d9e61213fe69a0eac72cec80a1d9519 Mon Sep 17 00:00:00 2001 From: Jamie Pate Date: Mon, 15 Aug 2022 21:12:19 -0700 Subject: [PATCH 11/17] Improve performance of screen_get_dpi() in Javascript Replace a bisect with a single multiplication when calling screen_get_dpi() in Javascript Tested the value of window.matchMedia(`(resolution:${(window.devicePixelRatio*96).toFixed(100)}dpi)`).matches which is true except for values that cause a lot of rounding errors (e.g. dpr : 0.3 => resolution: 28.799999999999997dpi) Even in these cases the value matches the result of the previous `findDPI()` method. See also: https://github.com/godotengine/godot/commit/6cff589b5bd483b563fe465bde74ca94902aab41#r81273660 (cherry picked from commit c2e3971013b49c68bfa590b3b491d606b4f3adfb) --- .../js/libs/library_godot_display.js | 28 +++++-------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/platform/javascript/js/libs/library_godot_display.js b/platform/javascript/js/libs/library_godot_display.js index cfa0b3fd475..b16c59b1d74 100644 --- a/platform/javascript/js/libs/library_godot_display.js +++ b/platform/javascript/js/libs/library_godot_display.js @@ -335,26 +335,12 @@ const GodotDisplay = { $GodotDisplay__deps: ['$GodotConfig', '$GodotRuntime', '$GodotDisplayCursor', '$GodotEventListeners', '$GodotDisplayScreen', '$GodotDisplayVK'], $GodotDisplay: { window_icon: '', - findDPI: function () { - function testDPI(dpi) { - return window.matchMedia(`(max-resolution: ${dpi}dpi)`).matches; - } - function bisect(low, high, func) { - const mid = parseInt(((high - low) / 2) + low, 10); - if (high - low <= 1) { - return func(high) ? high : low; - } - if (func(mid)) { - return bisect(low, mid, func); - } - return bisect(mid, high, func); - } - try { - const dpi = bisect(0, 800, testDPI); - return dpi >= 96 ? dpi : 96; - } catch (e) { - return 96; - } + getDPI: function () { + // devicePixelRatio is given in dppx + // https://drafts.csswg.org/css-values/#resolution + // > due to the 1:96 fixed ratio of CSS *in* to CSS *px*, 1dppx is equivalent to 96dpi. + const dpi = Math.round(window.devicePixelRatio * 96); + return dpi >= 96 ? dpi : 96; }, }, @@ -472,7 +458,7 @@ const GodotDisplay = { godot_js_display_screen_dpi_get__sig: 'i', godot_js_display_screen_dpi_get: function () { - return GodotDisplay.findDPI(); + return GodotDisplay.getDPI(); }, godot_js_display_pixel_ratio_get__sig: 'f', From a53264ab41281bded40e8950374dc7a8b37c5fd7 Mon Sep 17 00:00:00 2001 From: Hugo Locurcio Date: Fri, 7 May 2021 22:21:03 +0200 Subject: [PATCH 12/17] Add support for documenting most editor settings in the class reference MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Settings defined in editor plugins are missing (about 100 of them), but all other settings (about 200 of them) can now be documented in the EditorSettings class. Co-authored-by: Rémi Verschelde (cherry picked from commit 63ce655e75e9d46c8b588ff258c3f50d5290c553) --- doc/classes/EditorSettings.xml | 428 +++++++++++++++++++++++++++++++++ editor/doc/doc_data.cpp | 19 +- 2 files changed, 445 insertions(+), 2 deletions(-) diff --git a/doc/classes/EditorSettings.xml b/doc/classes/EditorSettings.xml index 68641c80fcb..957aa13604c 100644 --- a/doc/classes/EditorSettings.xml +++ b/doc/classes/EditorSettings.xml @@ -155,6 +155,434 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/editor/doc/doc_data.cpp b/editor/doc/doc_data.cpp index 4cfed81a8d7..b84c513ebc6 100644 --- a/editor/doc/doc_data.cpp +++ b/editor/doc/doc_data.cpp @@ -39,6 +39,7 @@ #include "core/script_language.h" #include "core/translation.h" #include "core/version.h" +#include "editor/editor_settings.h" #include "scene/resources/theme.h" static String _get_indent(const String &p_text) { @@ -290,8 +291,15 @@ void DocData::generate(bool p_basic_types) { List properties; List own_properties; - if (name == "ProjectSettings") { - //special case for project settings, so settings can be documented + + // Special case for editor and project settings, so they can be documented. + if (name == "EditorSettings") { + // We don't create the full blown EditorSettings (+ config file) with `create()`, + // instead we just make a local instance to get default values. + Ref edset = memnew(EditorSettings); + edset->get_property_list(&properties); + own_properties = properties; + } else if (name == "ProjectSettings") { ProjectSettings::get_singleton()->get_property_list(&properties); own_properties = properties; } else { @@ -326,6 +334,13 @@ void DocData::generate(bool p_basic_types) { bool default_value_valid = false; Variant default_value; + if (name == "EditorSettings") { + if (E->get().name == "resource_local_to_scene" || E->get().name == "resource_name" || E->get().name == "resource_path" || E->get().name == "script") { + // Don't include spurious properties in the generated EditorSettings class reference. + continue; + } + } + if (name == "ProjectSettings") { // Special case for project settings, so that settings are not taken from the current project's settings if (E->get().name == "script" || From 245d70bec0a8bac3bbe5c233cd8e0cde2bbf75b5 Mon Sep 17 00:00:00 2001 From: Michael Alexsander Date: Sun, 21 Aug 2022 22:10:41 -0300 Subject: [PATCH 13/17] Fix `ItemList` selection visual when the scrollbar visibility changes (cherry picked from commit e298144a41338d80b5f0090ce8f28aef888c13dd) --- scene/gui/item_list.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scene/gui/item_list.cpp b/scene/gui/item_list.cpp index 3ed6bdbe4a5..5c512bc05d0 100644 --- a/scene/gui/item_list.cpp +++ b/scene/gui/item_list.cpp @@ -754,11 +754,7 @@ void ItemList::_notification(int p_what) { scroll_bar->set_anchor_and_margin(MARGIN_BOTTOM, ANCHOR_END, -bg->get_margin(MARGIN_BOTTOM)); Size2 size = get_size(); - int width = size.width - bg->get_minimum_size().width; - if (scroll_bar->is_visible()) { - width -= mw; - } draw_style_box(bg, Rect2(Point2(), size)); @@ -917,6 +913,10 @@ void ItemList::_notification(int p_what) { shape_changed = false; } + if (scroll_bar->is_visible()) { + width -= mw; + } + //ensure_selected_visible needs to be checked before we draw the list. if (ensure_selected_visible && current >= 0 && current < items.size()) { Rect2 r = items[current].rect_cache; From 7dd322f83e7a9fbd18767c41eafeda1186fb4317 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Verschelde?= Date: Tue, 23 Aug 2022 16:47:48 +0200 Subject: [PATCH 14/17] libwebp: Sync with upstream 1.2.4 Changes: https://chromium.googlesource.com/webm/libwebp/+/1.2.4/NEWS (cherry picked from commit e5e34f21fc36c8a162bb46e845bb3465cd434150) --- modules/webp/SCsub | 238 ++++---- thirdparty/README.md | 4 +- thirdparty/libwebp/AUTHORS | 4 + thirdparty/libwebp/sharpyuv/sharpyuv.c | 498 +++++++++++++++++ thirdparty/libwebp/sharpyuv/sharpyuv.h | 81 +++ thirdparty/libwebp/sharpyuv/sharpyuv_csp.c | 110 ++++ thirdparty/libwebp/sharpyuv/sharpyuv_csp.h | 59 ++ thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c | 102 ++++ thirdparty/libwebp/sharpyuv/sharpyuv_dsp.h | 29 + thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c | 114 ++++ thirdparty/libwebp/sharpyuv/sharpyuv_gamma.h | 35 ++ thirdparty/libwebp/sharpyuv/sharpyuv_neon.c | 182 ++++++ thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c | 204 +++++++ thirdparty/libwebp/src/dec/vp8i_dec.h | 2 +- thirdparty/libwebp/src/dec/vp8l_dec.c | 6 +- thirdparty/libwebp/src/demux/demux.c | 5 +- .../libwebp/src/dsp/alpha_processing_neon.c | 6 +- thirdparty/libwebp/src/dsp/cpu.c | 2 +- thirdparty/libwebp/src/dsp/cpu.h | 254 +++++++++ thirdparty/libwebp/src/dsp/dsp.h | 229 +------- thirdparty/libwebp/src/dsp/lossless.h | 8 +- thirdparty/libwebp/src/dsp/lossless_enc.c | 12 +- .../libwebp/src/dsp/lossless_enc_mips32.c | 22 +- .../libwebp/src/dsp/lossless_enc_sse2.c | 4 +- thirdparty/libwebp/src/dsp/yuv.c | 64 --- thirdparty/libwebp/src/dsp/yuv_neon.c | 108 ---- thirdparty/libwebp/src/dsp/yuv_sse2.c | 119 ---- thirdparty/libwebp/src/enc/alpha_enc.c | 2 +- .../src/enc/backward_references_cost_enc.c | 77 +-- .../libwebp/src/enc/backward_references_enc.c | 86 ++- .../libwebp/src/enc/backward_references_enc.h | 12 +- thirdparty/libwebp/src/enc/histogram_enc.c | 208 ++++--- thirdparty/libwebp/src/enc/histogram_enc.h | 24 +- thirdparty/libwebp/src/enc/picture_csp_enc.c | 498 +++-------------- thirdparty/libwebp/src/enc/picture_enc.c | 44 +- .../libwebp/src/enc/picture_rescale_enc.c | 72 +-- .../libwebp/src/enc/picture_tools_enc.c | 45 +- thirdparty/libwebp/src/enc/predictor_enc.c | 50 +- thirdparty/libwebp/src/enc/quant_enc.c | 62 +- thirdparty/libwebp/src/enc/vp8i_enc.h | 24 +- thirdparty/libwebp/src/enc/vp8l_enc.c | 529 ++++++++++-------- thirdparty/libwebp/src/enc/vp8li_enc.h | 24 +- thirdparty/libwebp/src/enc/webp_enc.c | 4 +- thirdparty/libwebp/src/mux/muxedit.c | 1 + thirdparty/libwebp/src/mux/muxi.h | 2 +- thirdparty/libwebp/src/mux/muxinternal.c | 9 +- thirdparty/libwebp/src/webp/encode.h | 6 +- 47 files changed, 2611 insertions(+), 1669 deletions(-) create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv.c create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv.h create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_csp.c create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_csp.h create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_dsp.h create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_gamma.h create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_neon.c create mode 100644 thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c create mode 100644 thirdparty/libwebp/src/dsp/cpu.h diff --git a/modules/webp/SCsub b/modules/webp/SCsub index 80d62400c82..72ad1ea5e44 100644 --- a/modules/webp/SCsub +++ b/modules/webp/SCsub @@ -12,123 +12,129 @@ thirdparty_obj = [] if env["builtin_libwebp"]: thirdparty_dir = "#thirdparty/libwebp/" thirdparty_sources = [ - "dec/alpha_dec.c", - "dec/buffer_dec.c", - "dec/frame_dec.c", - "dec/idec_dec.c", - "dec/io_dec.c", - "dec/quant_dec.c", - "dec/tree_dec.c", - "dec/vp8_dec.c", - "dec/vp8l_dec.c", - "dec/webp_dec.c", - "demux/anim_decode.c", - "demux/demux.c", - "dsp/alpha_processing.c", - "dsp/alpha_processing_mips_dsp_r2.c", - "dsp/alpha_processing_neon.c", - "dsp/alpha_processing_sse2.c", - "dsp/alpha_processing_sse41.c", - "dsp/cost.c", - "dsp/cost_mips32.c", - "dsp/cost_mips_dsp_r2.c", - "dsp/cost_neon.c", - "dsp/cost_sse2.c", - "dsp/cpu.c", - "dsp/dec.c", - "dsp/dec_clip_tables.c", - "dsp/dec_mips32.c", - "dsp/dec_mips_dsp_r2.c", - "dsp/dec_msa.c", - "dsp/dec_neon.c", - "dsp/dec_sse2.c", - "dsp/dec_sse41.c", - "dsp/enc.c", - "dsp/enc_mips32.c", - "dsp/enc_mips_dsp_r2.c", - "dsp/enc_msa.c", - "dsp/enc_neon.c", - "dsp/enc_sse2.c", - "dsp/enc_sse41.c", - "dsp/filters.c", - "dsp/filters_mips_dsp_r2.c", - "dsp/filters_msa.c", - "dsp/filters_neon.c", - "dsp/filters_sse2.c", - "dsp/lossless.c", - "dsp/lossless_enc.c", - "dsp/lossless_enc_mips32.c", - "dsp/lossless_enc_mips_dsp_r2.c", - "dsp/lossless_enc_msa.c", - "dsp/lossless_enc_neon.c", - "dsp/lossless_enc_sse2.c", - "dsp/lossless_enc_sse41.c", - "dsp/lossless_mips_dsp_r2.c", - "dsp/lossless_msa.c", - "dsp/lossless_neon.c", - "dsp/lossless_sse2.c", - "dsp/lossless_sse41.c", - "dsp/rescaler.c", - "dsp/rescaler_mips32.c", - "dsp/rescaler_mips_dsp_r2.c", - "dsp/rescaler_msa.c", - "dsp/rescaler_neon.c", - "dsp/rescaler_sse2.c", - "dsp/ssim.c", - "dsp/ssim_sse2.c", - "dsp/upsampling.c", - "dsp/upsampling_mips_dsp_r2.c", - "dsp/upsampling_msa.c", - "dsp/upsampling_neon.c", - "dsp/upsampling_sse2.c", - "dsp/upsampling_sse41.c", - "dsp/yuv.c", - "dsp/yuv_mips32.c", - "dsp/yuv_mips_dsp_r2.c", - "dsp/yuv_neon.c", - "dsp/yuv_sse2.c", - "dsp/yuv_sse41.c", - "enc/alpha_enc.c", - "enc/analysis_enc.c", - "enc/backward_references_cost_enc.c", - "enc/backward_references_enc.c", - "enc/config_enc.c", - "enc/cost_enc.c", - "enc/filter_enc.c", - "enc/frame_enc.c", - "enc/histogram_enc.c", - "enc/iterator_enc.c", - "enc/near_lossless_enc.c", - "enc/picture_csp_enc.c", - "enc/picture_enc.c", - "enc/picture_psnr_enc.c", - "enc/picture_rescale_enc.c", - "enc/picture_tools_enc.c", - "enc/predictor_enc.c", - "enc/quant_enc.c", - "enc/syntax_enc.c", - "enc/token_enc.c", - "enc/tree_enc.c", - "enc/vp8l_enc.c", - "enc/webp_enc.c", - "mux/anim_encode.c", - "mux/muxedit.c", - "mux/muxinternal.c", - "mux/muxread.c", - "utils/bit_reader_utils.c", - "utils/bit_writer_utils.c", - "utils/color_cache_utils.c", - "utils/filters_utils.c", - "utils/huffman_encode_utils.c", - "utils/huffman_utils.c", - "utils/quant_levels_dec_utils.c", - "utils/quant_levels_utils.c", - "utils/random_utils.c", - "utils/rescaler_utils.c", - "utils/thread_utils.c", - "utils/utils.c", + "sharpyuv/sharpyuv.c", + "sharpyuv/sharpyuv_csp.c", + "sharpyuv/sharpyuv_dsp.c", + "sharpyuv/sharpyuv_gamma.c", + "sharpyuv/sharpyuv_neon.c", + "sharpyuv/sharpyuv_sse2.c", + "src/dec/alpha_dec.c", + "src/dec/buffer_dec.c", + "src/dec/frame_dec.c", + "src/dec/idec_dec.c", + "src/dec/io_dec.c", + "src/dec/quant_dec.c", + "src/dec/tree_dec.c", + "src/dec/vp8_dec.c", + "src/dec/vp8l_dec.c", + "src/dec/webp_dec.c", + "src/demux/anim_decode.c", + "src/demux/demux.c", + "src/dsp/alpha_processing.c", + "src/dsp/alpha_processing_mips_dsp_r2.c", + "src/dsp/alpha_processing_neon.c", + "src/dsp/alpha_processing_sse2.c", + "src/dsp/alpha_processing_sse41.c", + "src/dsp/cost.c", + "src/dsp/cost_mips32.c", + "src/dsp/cost_mips_dsp_r2.c", + "src/dsp/cost_neon.c", + "src/dsp/cost_sse2.c", + "src/dsp/cpu.c", + "src/dsp/dec.c", + "src/dsp/dec_clip_tables.c", + "src/dsp/dec_mips32.c", + "src/dsp/dec_mips_dsp_r2.c", + "src/dsp/dec_msa.c", + "src/dsp/dec_neon.c", + "src/dsp/dec_sse2.c", + "src/dsp/dec_sse41.c", + "src/dsp/enc.c", + "src/dsp/enc_mips32.c", + "src/dsp/enc_mips_dsp_r2.c", + "src/dsp/enc_msa.c", + "src/dsp/enc_neon.c", + "src/dsp/enc_sse2.c", + "src/dsp/enc_sse41.c", + "src/dsp/filters.c", + "src/dsp/filters_mips_dsp_r2.c", + "src/dsp/filters_msa.c", + "src/dsp/filters_neon.c", + "src/dsp/filters_sse2.c", + "src/dsp/lossless.c", + "src/dsp/lossless_enc.c", + "src/dsp/lossless_enc_mips32.c", + "src/dsp/lossless_enc_mips_dsp_r2.c", + "src/dsp/lossless_enc_msa.c", + "src/dsp/lossless_enc_neon.c", + "src/dsp/lossless_enc_sse2.c", + "src/dsp/lossless_enc_sse41.c", + "src/dsp/lossless_mips_dsp_r2.c", + "src/dsp/lossless_msa.c", + "src/dsp/lossless_neon.c", + "src/dsp/lossless_sse2.c", + "src/dsp/lossless_sse41.c", + "src/dsp/rescaler.c", + "src/dsp/rescaler_mips32.c", + "src/dsp/rescaler_mips_dsp_r2.c", + "src/dsp/rescaler_msa.c", + "src/dsp/rescaler_neon.c", + "src/dsp/rescaler_sse2.c", + "src/dsp/ssim.c", + "src/dsp/ssim_sse2.c", + "src/dsp/upsampling.c", + "src/dsp/upsampling_mips_dsp_r2.c", + "src/dsp/upsampling_msa.c", + "src/dsp/upsampling_neon.c", + "src/dsp/upsampling_sse2.c", + "src/dsp/upsampling_sse41.c", + "src/dsp/yuv.c", + "src/dsp/yuv_mips32.c", + "src/dsp/yuv_mips_dsp_r2.c", + "src/dsp/yuv_neon.c", + "src/dsp/yuv_sse2.c", + "src/dsp/yuv_sse41.c", + "src/enc/alpha_enc.c", + "src/enc/analysis_enc.c", + "src/enc/backward_references_cost_enc.c", + "src/enc/backward_references_enc.c", + "src/enc/config_enc.c", + "src/enc/cost_enc.c", + "src/enc/filter_enc.c", + "src/enc/frame_enc.c", + "src/enc/histogram_enc.c", + "src/enc/iterator_enc.c", + "src/enc/near_lossless_enc.c", + "src/enc/picture_csp_enc.c", + "src/enc/picture_enc.c", + "src/enc/picture_psnr_enc.c", + "src/enc/picture_rescale_enc.c", + "src/enc/picture_tools_enc.c", + "src/enc/predictor_enc.c", + "src/enc/quant_enc.c", + "src/enc/syntax_enc.c", + "src/enc/token_enc.c", + "src/enc/tree_enc.c", + "src/enc/vp8l_enc.c", + "src/enc/webp_enc.c", + "src/mux/anim_encode.c", + "src/mux/muxedit.c", + "src/mux/muxinternal.c", + "src/mux/muxread.c", + "src/utils/bit_reader_utils.c", + "src/utils/bit_writer_utils.c", + "src/utils/color_cache_utils.c", + "src/utils/filters_utils.c", + "src/utils/huffman_encode_utils.c", + "src/utils/huffman_utils.c", + "src/utils/quant_levels_dec_utils.c", + "src/utils/quant_levels_utils.c", + "src/utils/random_utils.c", + "src/utils/rescaler_utils.c", + "src/utils/thread_utils.c", + "src/utils/utils.c", ] - thirdparty_sources = [thirdparty_dir + "src/" + file for file in thirdparty_sources] + thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources] env_webp.Prepend(CPPPATH=[thirdparty_dir, thirdparty_dir + "src/"]) diff --git a/thirdparty/README.md b/thirdparty/README.md index ea86b8bb6dc..1a1e10f7e85 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -266,12 +266,12 @@ from the Android NDK r18. ## libwebp - Upstream: https://chromium.googlesource.com/webm/libwebp/ -- Version: 1.2.2 (b0a860891dcd4c0c2d7c6149e5cccb6eb881cc21, 2022) +- Version: 1.2.4 (0d1f12546bd803099a60c070517a552483f3790e, 2022) - License: BSD-3-Clause Files extracted from upstream source: -- `src/*` except from: `.am`, `.rc` and `.in` files +- `src/` and `sharpyuv/` except from: `.am`, `.rc` and `.in` files - `AUTHORS`, `COPYING`, `PATENTS` diff --git a/thirdparty/libwebp/AUTHORS b/thirdparty/libwebp/AUTHORS index 8307c2099d1..3efcbe25b65 100644 --- a/thirdparty/libwebp/AUTHORS +++ b/thirdparty/libwebp/AUTHORS @@ -1,12 +1,15 @@ Contributors: - Aidan O'Loan (aidanol at gmail dot com) - Alan Browning (browning at google dot com) +- Alexandru Ardelean (ardeleanalex at gmail dot com) +- Brian Ledger (brianpl at google dot com) - Charles Munger (clm at google dot com) - Cheng Yi (cyi at google dot com) - Christian Duvivier (cduvivier at google dot com) - Christopher Degawa (ccom at randomderp dot com) - Clement Courbet (courbet at google dot com) - Djordje Pesut (djordje dot pesut at imgtec dot com) +- Frank Barchard (fbarchard at google dot com) - Hui Su (huisu at google dot com) - Ilya Kurdyukov (jpegqs at gmail dot com) - Ingvar Stepanyan (rreverser at google dot com) @@ -22,6 +25,7 @@ Contributors: - Mans Rullgard (mans at mansr dot com) - Marcin Kowalczyk (qrczak at google dot com) - Martin Olsson (mnemo at minimum dot se) +- Maryla Ustarroz-Calonge (maryla at google dot com) - Mikołaj Zalewski (mikolajz at google dot com) - Mislav Bradac (mislavm at google dot com) - Nico Weber (thakis at chromium dot org) diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv.c b/thirdparty/libwebp/sharpyuv/sharpyuv.c new file mode 100644 index 00000000000..8b3ab7216b4 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv.c @@ -0,0 +1,498 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Sharp RGB to YUV conversion. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "sharpyuv/sharpyuv.h" + +#include +#include +#include +#include +#include + +#include "src/webp/types.h" +#include "src/dsp/cpu.h" +#include "sharpyuv/sharpyuv_dsp.h" +#include "sharpyuv/sharpyuv_gamma.h" + +//------------------------------------------------------------------------------ +// Sharp RGB->YUV conversion + +static const int kNumIterations = 4; + +#define YUV_FIX 16 // fixed-point precision for RGB->YUV +static const int kYuvHalf = 1 << (YUV_FIX - 1); + +// Max bit depth so that intermediate calculations fit in 16 bits. +static const int kMaxBitDepth = 14; + +// Returns the precision shift to use based on the input rgb_bit_depth. +static int GetPrecisionShift(int rgb_bit_depth) { + // Try to add 2 bits of precision if it fits in kMaxBitDepth. Otherwise remove + // bits if needed. + return ((rgb_bit_depth + 2) <= kMaxBitDepth) ? 2 + : (kMaxBitDepth - rgb_bit_depth); +} + +typedef int16_t fixed_t; // signed type with extra precision for UV +typedef uint16_t fixed_y_t; // unsigned type with extra precision for W + +//------------------------------------------------------------------------------ + +static uint8_t clip_8b(fixed_t v) { + return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; +} + +static uint16_t clip(fixed_t v, int max) { + return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; +} + +static fixed_y_t clip_bit_depth(int y, int bit_depth) { + const int max = (1 << bit_depth) - 1; + return (!(y & ~max)) ? (fixed_y_t)y : (y < 0) ? 0 : max; +} + +//------------------------------------------------------------------------------ + +static int RGBToGray(int64_t r, int64_t g, int64_t b) { + const int64_t luma = 13933 * r + 46871 * g + 4732 * b + kYuvHalf; + return (int)(luma >> YUV_FIX); +} + +static uint32_t ScaleDown(uint16_t a, uint16_t b, uint16_t c, uint16_t d, + int rgb_bit_depth) { + const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); + const uint32_t A = SharpYuvGammaToLinear(a, bit_depth); + const uint32_t B = SharpYuvGammaToLinear(b, bit_depth); + const uint32_t C = SharpYuvGammaToLinear(c, bit_depth); + const uint32_t D = SharpYuvGammaToLinear(d, bit_depth); + return SharpYuvLinearToGamma((A + B + C + D + 2) >> 2, bit_depth); +} + +static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w, + int rgb_bit_depth) { + const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); + int i; + for (i = 0; i < w; ++i) { + const uint32_t R = SharpYuvGammaToLinear(src[0 * w + i], bit_depth); + const uint32_t G = SharpYuvGammaToLinear(src[1 * w + i], bit_depth); + const uint32_t B = SharpYuvGammaToLinear(src[2 * w + i], bit_depth); + const uint32_t Y = RGBToGray(R, G, B); + dst[i] = (fixed_y_t)SharpYuvLinearToGamma(Y, bit_depth); + } +} + +static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, + fixed_t* dst, int uv_w, int rgb_bit_depth) { + int i; + for (i = 0; i < uv_w; ++i) { + const int r = + ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], src2[0 * uv_w + 0], + src2[0 * uv_w + 1], rgb_bit_depth); + const int g = + ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], src2[2 * uv_w + 0], + src2[2 * uv_w + 1], rgb_bit_depth); + const int b = + ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], src2[4 * uv_w + 0], + src2[4 * uv_w + 1], rgb_bit_depth); + const int W = RGBToGray(r, g, b); + dst[0 * uv_w] = (fixed_t)(r - W); + dst[1 * uv_w] = (fixed_t)(g - W); + dst[2 * uv_w] = (fixed_t)(b - W); + dst += 1; + src1 += 2; + src2 += 2; + } +} + +static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { + int i; + assert(w > 0); + for (i = 0; i < w; ++i) { + y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); + } +} + +//------------------------------------------------------------------------------ + +static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0, int bit_depth) { + const int v0 = (A * 3 + B + 2) >> 2; + return clip_bit_depth(v0 + W0, bit_depth); +} + +//------------------------------------------------------------------------------ + +static WEBP_INLINE int Shift(int v, int shift) { + return (shift >= 0) ? (v << shift) : (v >> -shift); +} + +static void ImportOneRow(const uint8_t* const r_ptr, + const uint8_t* const g_ptr, + const uint8_t* const b_ptr, + int rgb_step, + int rgb_bit_depth, + int pic_width, + fixed_y_t* const dst) { + // Convert the rgb_step from a number of bytes to a number of uint8_t or + // uint16_t values depending the bit depth. + const int step = (rgb_bit_depth > 8) ? rgb_step / 2 : rgb_step; + int i; + const int w = (pic_width + 1) & ~1; + for (i = 0; i < pic_width; ++i) { + const int off = i * step; + const int shift = GetPrecisionShift(rgb_bit_depth); + if (rgb_bit_depth == 8) { + dst[i + 0 * w] = Shift(r_ptr[off], shift); + dst[i + 1 * w] = Shift(g_ptr[off], shift); + dst[i + 2 * w] = Shift(b_ptr[off], shift); + } else { + dst[i + 0 * w] = Shift(((uint16_t*)r_ptr)[off], shift); + dst[i + 1 * w] = Shift(((uint16_t*)g_ptr)[off], shift); + dst[i + 2 * w] = Shift(((uint16_t*)b_ptr)[off], shift); + } + } + if (pic_width & 1) { // replicate rightmost pixel + dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; + dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; + dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; + } +} + +static void InterpolateTwoRows(const fixed_y_t* const best_y, + const fixed_t* prev_uv, + const fixed_t* cur_uv, + const fixed_t* next_uv, + int w, + fixed_y_t* out1, + fixed_y_t* out2, + int rgb_bit_depth) { + const int uv_w = w >> 1; + const int len = (w - 1) >> 1; // length to filter + int k = 3; + const int bit_depth = rgb_bit_depth + GetPrecisionShift(rgb_bit_depth); + while (k-- > 0) { // process each R/G/B segments in turn + // special boundary case for i==0 + out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0], bit_depth); + out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w], bit_depth); + + SharpYuvFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1, + bit_depth); + SharpYuvFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1, + bit_depth); + + // special boundary case for i == w - 1 when w is even + if (!(w & 1)) { + out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], + best_y[w - 1 + 0], bit_depth); + out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], + best_y[w - 1 + w], bit_depth); + } + out1 += w; + out2 += w; + prev_uv += uv_w; + cur_uv += uv_w; + next_uv += uv_w; + } +} + +static WEBP_INLINE int RGBToYUVComponent(int r, int g, int b, + const int coeffs[4], int sfix) { + const int srounder = 1 << (YUV_FIX + sfix - 1); + const int luma = coeffs[0] * r + coeffs[1] * g + coeffs[2] * b + + coeffs[3] + srounder; + return (luma >> (YUV_FIX + sfix)); +} + +static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, + uint8_t* y_ptr, int y_stride, uint8_t* u_ptr, + int u_stride, uint8_t* v_ptr, int v_stride, + int rgb_bit_depth, + int yuv_bit_depth, int width, int height, + const SharpYuvConversionMatrix* yuv_matrix) { + int i, j; + const fixed_t* const best_uv_base = best_uv; + const int w = (width + 1) & ~1; + const int h = (height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + const int sfix = GetPrecisionShift(rgb_bit_depth); + const int yuv_max = (1 << yuv_bit_depth) - 1; + + for (best_uv = best_uv_base, j = 0; j < height; ++j) { + for (i = 0; i < width; ++i) { + const int off = (i >> 1); + const int W = best_y[i]; + const int r = best_uv[off + 0 * uv_w] + W; + const int g = best_uv[off + 1 * uv_w] + W; + const int b = best_uv[off + 2 * uv_w] + W; + const int y = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_y, sfix); + if (yuv_bit_depth <= 8) { + y_ptr[i] = clip_8b(y); + } else { + ((uint16_t*)y_ptr)[i] = clip(y, yuv_max); + } + } + best_y += w; + best_uv += (j & 1) * 3 * uv_w; + y_ptr += y_stride; + } + for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { + for (i = 0; i < uv_w; ++i) { + const int off = i; + // Note r, g and b values here are off by W, but a constant offset on all + // 3 components doesn't change the value of u and v with a YCbCr matrix. + const int r = best_uv[off + 0 * uv_w]; + const int g = best_uv[off + 1 * uv_w]; + const int b = best_uv[off + 2 * uv_w]; + const int u = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_u, sfix); + const int v = RGBToYUVComponent(r, g, b, yuv_matrix->rgb_to_v, sfix); + if (yuv_bit_depth <= 8) { + u_ptr[i] = clip_8b(u); + v_ptr[i] = clip_8b(v); + } else { + ((uint16_t*)u_ptr)[i] = clip(u, yuv_max); + ((uint16_t*)v_ptr)[i] = clip(v, yuv_max); + } + } + best_uv += 3 * uv_w; + u_ptr += u_stride; + v_ptr += v_stride; + } + return 1; +} + +//------------------------------------------------------------------------------ +// Main function + +static void* SafeMalloc(uint64_t nmemb, size_t size) { + const uint64_t total_size = nmemb * (uint64_t)size; + if (total_size != (size_t)total_size) return NULL; + return malloc((size_t)total_size); +} + +#define SAFE_ALLOC(W, H, T) ((T*)SafeMalloc((W) * (H), sizeof(T))) + +static int DoSharpArgbToYuv(const uint8_t* r_ptr, const uint8_t* g_ptr, + const uint8_t* b_ptr, int rgb_step, int rgb_stride, + int rgb_bit_depth, uint8_t* y_ptr, int y_stride, + uint8_t* u_ptr, int u_stride, uint8_t* v_ptr, + int v_stride, int yuv_bit_depth, int width, + int height, + const SharpYuvConversionMatrix* yuv_matrix) { + // we expand the right/bottom border if needed + const int w = (width + 1) & ~1; + const int h = (height + 1) & ~1; + const int uv_w = w >> 1; + const int uv_h = h >> 1; + uint64_t prev_diff_y_sum = ~0; + int j, iter; + + // TODO(skal): allocate one big memory chunk. But for now, it's easier + // for valgrind debugging to have several chunks. + fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch + fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); + fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); + fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); + fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); + fixed_y_t* best_y = best_y_base; + fixed_y_t* target_y = target_y_base; + fixed_t* best_uv = best_uv_base; + fixed_t* target_uv = target_uv_base; + const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); + int ok; + assert(w > 0); + assert(h > 0); + + if (best_y_base == NULL || best_uv_base == NULL || + target_y_base == NULL || target_uv_base == NULL || + best_rgb_y == NULL || best_rgb_uv == NULL || + tmp_buffer == NULL) { + ok = 0; + goto End; + } + + // Import RGB samples to W/RGB representation. + for (j = 0; j < height; j += 2) { + const int is_last_row = (j == height - 1); + fixed_y_t* const src1 = tmp_buffer + 0 * w; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + + // prepare two rows of input + ImportOneRow(r_ptr, g_ptr, b_ptr, rgb_step, rgb_bit_depth, width, + src1); + if (!is_last_row) { + ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, + rgb_step, rgb_bit_depth, width, src2); + } else { + memcpy(src2, src1, 3 * w * sizeof(*src2)); + } + StoreGray(src1, best_y + 0, w); + StoreGray(src2, best_y + w, w); + + UpdateW(src1, target_y, w, rgb_bit_depth); + UpdateW(src2, target_y + w, w, rgb_bit_depth); + UpdateChroma(src1, src2, target_uv, uv_w, rgb_bit_depth); + memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); + best_y += 2 * w; + best_uv += 3 * uv_w; + target_y += 2 * w; + target_uv += 3 * uv_w; + r_ptr += 2 * rgb_stride; + g_ptr += 2 * rgb_stride; + b_ptr += 2 * rgb_stride; + } + + // Iterate and resolve clipping conflicts. + for (iter = 0; iter < kNumIterations; ++iter) { + const fixed_t* cur_uv = best_uv_base; + const fixed_t* prev_uv = best_uv_base; + uint64_t diff_y_sum = 0; + + best_y = best_y_base; + best_uv = best_uv_base; + target_y = target_y_base; + target_uv = target_uv_base; + for (j = 0; j < h; j += 2) { + fixed_y_t* const src1 = tmp_buffer + 0 * w; + fixed_y_t* const src2 = tmp_buffer + 3 * w; + { + const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); + InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, + src1, src2, rgb_bit_depth); + prev_uv = cur_uv; + cur_uv = next_uv; + } + + UpdateW(src1, best_rgb_y + 0 * w, w, rgb_bit_depth); + UpdateW(src2, best_rgb_y + 1 * w, w, rgb_bit_depth); + UpdateChroma(src1, src2, best_rgb_uv, uv_w, rgb_bit_depth); + + // update two rows of Y and one row of RGB + diff_y_sum += + SharpYuvUpdateY(target_y, best_rgb_y, best_y, 2 * w, + rgb_bit_depth + GetPrecisionShift(rgb_bit_depth)); + SharpYuvUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); + + best_y += 2 * w; + best_uv += 3 * uv_w; + target_y += 2 * w; + target_uv += 3 * uv_w; + } + // test exit condition + if (iter > 0) { + if (diff_y_sum < diff_y_threshold) break; + if (diff_y_sum > prev_diff_y_sum) break; + } + prev_diff_y_sum = diff_y_sum; + } + + // final reconstruction + ok = ConvertWRGBToYUV(best_y_base, best_uv_base, y_ptr, y_stride, u_ptr, + u_stride, v_ptr, v_stride, rgb_bit_depth, yuv_bit_depth, + width, height, yuv_matrix); + + End: + free(best_y_base); + free(best_uv_base); + free(target_y_base); + free(target_uv_base); + free(best_rgb_y); + free(best_rgb_uv); + free(tmp_buffer); + return ok; +} +#undef SAFE_ALLOC + +// Hidden exported init function. +// By default SharpYuvConvert calls it with NULL. If needed, users can declare +// it as extern and call it with a VP8CPUInfo function. +extern void SharpYuvInit(VP8CPUInfo cpu_info_func); +void SharpYuvInit(VP8CPUInfo cpu_info_func) { + static volatile VP8CPUInfo sharpyuv_last_cpuinfo_used = + (VP8CPUInfo)&sharpyuv_last_cpuinfo_used; + const int initialized = + (sharpyuv_last_cpuinfo_used != (VP8CPUInfo)&sharpyuv_last_cpuinfo_used); + if (cpu_info_func == NULL && initialized) return; + if (sharpyuv_last_cpuinfo_used == cpu_info_func) return; + + SharpYuvInitDsp(cpu_info_func); + if (!initialized) { + SharpYuvInitGammaTables(); + } + + sharpyuv_last_cpuinfo_used = cpu_info_func; +} + +int SharpYuvConvert(const void* r_ptr, const void* g_ptr, + const void* b_ptr, int rgb_step, int rgb_stride, + int rgb_bit_depth, void* y_ptr, int y_stride, + void* u_ptr, int u_stride, void* v_ptr, + int v_stride, int yuv_bit_depth, int width, + int height, const SharpYuvConversionMatrix* yuv_matrix) { + SharpYuvConversionMatrix scaled_matrix; + const int rgb_max = (1 << rgb_bit_depth) - 1; + const int rgb_round = 1 << (rgb_bit_depth - 1); + const int yuv_max = (1 << yuv_bit_depth) - 1; + const int sfix = GetPrecisionShift(rgb_bit_depth); + + if (width < 1 || height < 1 || width == INT_MAX || height == INT_MAX || + r_ptr == NULL || g_ptr == NULL || b_ptr == NULL || y_ptr == NULL || + u_ptr == NULL || v_ptr == NULL) { + return 0; + } + if (rgb_bit_depth != 8 && rgb_bit_depth != 10 && rgb_bit_depth != 12 && + rgb_bit_depth != 16) { + return 0; + } + if (yuv_bit_depth != 8 && yuv_bit_depth != 10 && yuv_bit_depth != 12) { + return 0; + } + if (rgb_bit_depth > 8 && (rgb_step % 2 != 0 || rgb_stride %2 != 0)) { + // Step/stride should be even for uint16_t buffers. + return 0; + } + if (yuv_bit_depth > 8 && + (y_stride % 2 != 0 || u_stride % 2 != 0 || v_stride % 2 != 0)) { + // Stride should be even for uint16_t buffers. + return 0; + } + SharpYuvInit(NULL); + + // Add scaling factor to go from rgb_bit_depth to yuv_bit_depth, to the + // rgb->yuv conversion matrix. + if (rgb_bit_depth == yuv_bit_depth) { + memcpy(&scaled_matrix, yuv_matrix, sizeof(scaled_matrix)); + } else { + int i; + for (i = 0; i < 3; ++i) { + scaled_matrix.rgb_to_y[i] = + (yuv_matrix->rgb_to_y[i] * yuv_max + rgb_round) / rgb_max; + scaled_matrix.rgb_to_u[i] = + (yuv_matrix->rgb_to_u[i] * yuv_max + rgb_round) / rgb_max; + scaled_matrix.rgb_to_v[i] = + (yuv_matrix->rgb_to_v[i] * yuv_max + rgb_round) / rgb_max; + } + } + // Also incorporate precision change scaling. + scaled_matrix.rgb_to_y[3] = Shift(yuv_matrix->rgb_to_y[3], sfix); + scaled_matrix.rgb_to_u[3] = Shift(yuv_matrix->rgb_to_u[3], sfix); + scaled_matrix.rgb_to_v[3] = Shift(yuv_matrix->rgb_to_v[3], sfix); + + return DoSharpArgbToYuv(r_ptr, g_ptr, b_ptr, rgb_step, rgb_stride, + rgb_bit_depth, y_ptr, y_stride, u_ptr, u_stride, + v_ptr, v_stride, yuv_bit_depth, width, height, + &scaled_matrix); +} + +//------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv.h b/thirdparty/libwebp/sharpyuv/sharpyuv.h new file mode 100644 index 00000000000..9386ea21850 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv.h @@ -0,0 +1,81 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Sharp RGB to YUV conversion. + +#ifndef WEBP_SHARPYUV_SHARPYUV_H_ +#define WEBP_SHARPYUV_SHARPYUV_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// SharpYUV API version following the convention from semver.org +#define SHARPYUV_VERSION_MAJOR 0 +#define SHARPYUV_VERSION_MINOR 1 +#define SHARPYUV_VERSION_PATCH 0 +// Version as a uint32_t. The major number is the high 8 bits. +// The minor number is the middle 8 bits. The patch number is the low 16 bits. +#define SHARPYUV_MAKE_VERSION(MAJOR, MINOR, PATCH) \ + (((MAJOR) << 24) | ((MINOR) << 16) | (PATCH)) +#define SHARPYUV_VERSION \ + SHARPYUV_MAKE_VERSION(SHARPYUV_VERSION_MAJOR, SHARPYUV_VERSION_MINOR, \ + SHARPYUV_VERSION_PATCH) + +// RGB to YUV conversion matrix, in 16 bit fixed point. +// y = rgb_to_y[0] * r + rgb_to_y[1] * g + rgb_to_y[2] * b + rgb_to_y[3] +// u = rgb_to_u[0] * r + rgb_to_u[1] * g + rgb_to_u[2] * b + rgb_to_u[3] +// v = rgb_to_v[0] * r + rgb_to_v[1] * g + rgb_to_v[2] * b + rgb_to_v[3] +// Then y, u and v values are divided by 1<<16 and rounded. +typedef struct { + int rgb_to_y[4]; + int rgb_to_u[4]; + int rgb_to_v[4]; +} SharpYuvConversionMatrix; + +// Converts RGB to YUV420 using a downsampling algorithm that minimizes +// artefacts caused by chroma subsampling. +// This is slower than standard downsampling (averaging of 4 UV values). +// Assumes that the image will be upsampled using a bilinear filter. If nearest +// neighbor is used instead, the upsampled image might look worse than with +// standard downsampling. +// r_ptr, g_ptr, b_ptr: pointers to the source r, g and b channels. Should point +// to uint8_t buffers if rgb_bit_depth is 8, or uint16_t buffers otherwise. +// rgb_step: distance in bytes between two horizontally adjacent pixels on the +// r, g and b channels. If rgb_bit_depth is > 8, it should be a +// multiple of 2. +// rgb_stride: distance in bytes between two vertically adjacent pixels on the +// r, g, and b channels. If rgb_bit_depth is > 8, it should be a +// multiple of 2. +// rgb_bit_depth: number of bits for each r/g/b value. One of: 8, 10, 12, 16. +// Note: 16 bit input is truncated to 14 bits before conversion to yuv. +// yuv_bit_depth: number of bits for each y/u/v value. One of: 8, 10, 12. +// y_ptr, u_ptr, v_ptr: pointers to the destination y, u and v channels. Should +// point to uint8_t buffers if yuv_bit_depth is 8, or uint16_t buffers +// otherwise. +// y_stride, u_stride, v_stride: distance in bytes between two vertically +// adjacent pixels on the y, u and v channels. If yuv_bit_depth > 8, they +// should be multiples of 2. +// width, height: width and height of the image in pixels +int SharpYuvConvert(const void* r_ptr, const void* g_ptr, const void* b_ptr, + int rgb_step, int rgb_stride, int rgb_bit_depth, + void* y_ptr, int y_stride, void* u_ptr, int u_stride, + void* v_ptr, int v_stride, int yuv_bit_depth, int width, + int height, const SharpYuvConversionMatrix* yuv_matrix); + +// TODO(b/194336375): Add YUV444 to YUV420 conversion. Maybe also add 422 +// support (it's rarely used in practice, especially for images). + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_SHARPYUV_SHARPYUV_H_ diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_csp.c b/thirdparty/libwebp/sharpyuv/sharpyuv_csp.c new file mode 100644 index 00000000000..5334fa64fa9 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_csp.c @@ -0,0 +1,110 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Colorspace utilities. + +#include "sharpyuv/sharpyuv_csp.h" + +#include +#include +#include + +static int ToFixed16(float f) { return (int)floor(f * (1 << 16) + 0.5f); } + +void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space, + SharpYuvConversionMatrix* matrix) { + const float kr = yuv_color_space->kr; + const float kb = yuv_color_space->kb; + const float kg = 1.0f - kr - kb; + const float cr = 0.5f / (1.0f - kb); + const float cb = 0.5f / (1.0f - kr); + + const int shift = yuv_color_space->bit_depth - 8; + + const float denom = (float)((1 << yuv_color_space->bit_depth) - 1); + float scale_y = 1.0f; + float add_y = 0.0f; + float scale_u = cr; + float scale_v = cb; + float add_uv = (float)(128 << shift); + assert(yuv_color_space->bit_depth >= 8); + + if (yuv_color_space->range == kSharpYuvRangeLimited) { + scale_y *= (219 << shift) / denom; + scale_u *= (224 << shift) / denom; + scale_v *= (224 << shift) / denom; + add_y = (float)(16 << shift); + } + + matrix->rgb_to_y[0] = ToFixed16(kr * scale_y); + matrix->rgb_to_y[1] = ToFixed16(kg * scale_y); + matrix->rgb_to_y[2] = ToFixed16(kb * scale_y); + matrix->rgb_to_y[3] = ToFixed16(add_y); + + matrix->rgb_to_u[0] = ToFixed16(-kr * scale_u); + matrix->rgb_to_u[1] = ToFixed16(-kg * scale_u); + matrix->rgb_to_u[2] = ToFixed16((1 - kb) * scale_u); + matrix->rgb_to_u[3] = ToFixed16(add_uv); + + matrix->rgb_to_v[0] = ToFixed16((1 - kr) * scale_v); + matrix->rgb_to_v[1] = ToFixed16(-kg * scale_v); + matrix->rgb_to_v[2] = ToFixed16(-kb * scale_v); + matrix->rgb_to_v[3] = ToFixed16(add_uv); +} + +// Matrices are in YUV_FIX fixed point precision. +// WebP's matrix, similar but not identical to kRec601LimitedMatrix. +static const SharpYuvConversionMatrix kWebpMatrix = { + {16839, 33059, 6420, 16 << 16}, + {-9719, -19081, 28800, 128 << 16}, + {28800, -24116, -4684, 128 << 16}, +}; +// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeLimited +static const SharpYuvConversionMatrix kRec601LimitedMatrix = { + {16829, 33039, 6416, 16 << 16}, + {-9714, -19071, 28784, 128 << 16}, + {28784, -24103, -4681, 128 << 16}, +}; +// Kr=0.2990f Kb=0.1140f bits=8 range=kSharpYuvRangeFull +static const SharpYuvConversionMatrix kRec601FullMatrix = { + {19595, 38470, 7471, 0}, + {-11058, -21710, 32768, 128 << 16}, + {32768, -27439, -5329, 128 << 16}, +}; +// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeLimited +static const SharpYuvConversionMatrix kRec709LimitedMatrix = { + {11966, 40254, 4064, 16 << 16}, + {-6596, -22189, 28784, 128 << 16}, + {28784, -26145, -2639, 128 << 16}, +}; +// Kr=0.2126f Kb=0.0722f bits=8 range=kSharpYuvRangeFull +static const SharpYuvConversionMatrix kRec709FullMatrix = { + {13933, 46871, 4732, 0}, + {-7509, -25259, 32768, 128 << 16}, + {32768, -29763, -3005, 128 << 16}, +}; + +const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix( + SharpYuvMatrixType matrix_type) { + switch (matrix_type) { + case kSharpYuvMatrixWebp: + return &kWebpMatrix; + case kSharpYuvMatrixRec601Limited: + return &kRec601LimitedMatrix; + case kSharpYuvMatrixRec601Full: + return &kRec601FullMatrix; + case kSharpYuvMatrixRec709Limited: + return &kRec709LimitedMatrix; + case kSharpYuvMatrixRec709Full: + return &kRec709FullMatrix; + case kSharpYuvMatrixNum: + return NULL; + } + return NULL; +} diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_csp.h b/thirdparty/libwebp/sharpyuv/sharpyuv_csp.h new file mode 100644 index 00000000000..63c99ef5cd5 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_csp.h @@ -0,0 +1,59 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Colorspace utilities. + +#ifndef WEBP_SHARPYUV_SHARPYUV_CSP_H_ +#define WEBP_SHARPYUV_SHARPYUV_CSP_H_ + +#include "sharpyuv/sharpyuv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Range of YUV values. +typedef enum { + kSharpYuvRangeFull, // YUV values between [0;255] (for 8 bit) + kSharpYuvRangeLimited // Y in [16;235], YUV in [16;240] (for 8 bit) +} SharpYuvRange; + +// Constants that define a YUV color space. +typedef struct { + // Kr and Kb are defined such that: + // Y = Kr * r + Kg * g + Kb * b where Kg = 1 - Kr - Kb. + float kr; + float kb; + int bit_depth; // 8, 10 or 12 + SharpYuvRange range; +} SharpYuvColorSpace; + +// Fills in 'matrix' for the given YUVColorSpace. +void SharpYuvComputeConversionMatrix(const SharpYuvColorSpace* yuv_color_space, + SharpYuvConversionMatrix* matrix); + +// Enums for precomputed conversion matrices. +typedef enum { + kSharpYuvMatrixWebp = 0, + kSharpYuvMatrixRec601Limited, + kSharpYuvMatrixRec601Full, + kSharpYuvMatrixRec709Limited, + kSharpYuvMatrixRec709Full, + kSharpYuvMatrixNum +} SharpYuvMatrixType; + +// Returns a pointer to a matrix for one of the predefined colorspaces. +const SharpYuvConversionMatrix* SharpYuvGetConversionMatrix( + SharpYuvMatrixType matrix_type); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_SHARPYUV_SHARPYUV_CSP_H_ diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c new file mode 100644 index 00000000000..956fa7ce550 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.c @@ -0,0 +1,102 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Speed-critical functions for Sharp YUV. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "sharpyuv/sharpyuv_dsp.h" + +#include +#include + +#include "src/dsp/cpu.h" + +//----------------------------------------------------------------------------- + +#if !WEBP_NEON_OMIT_C_CODE +static uint16_t clip(int v, int max) { + return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; +} + +static uint64_t SharpYuvUpdateY_C(const uint16_t* ref, const uint16_t* src, + uint16_t* dst, int len, int bit_depth) { + uint64_t diff = 0; + int i; + const int max_y = (1 << bit_depth) - 1; + for (i = 0; i < len; ++i) { + const int diff_y = ref[i] - src[i]; + const int new_y = (int)dst[i] + diff_y; + dst[i] = clip(new_y, max_y); + diff += (uint64_t)abs(diff_y); + } + return diff; +} + +static void SharpYuvUpdateRGB_C(const int16_t* ref, const int16_t* src, + int16_t* dst, int len) { + int i; + for (i = 0; i < len; ++i) { + const int diff_uv = ref[i] - src[i]; + dst[i] += diff_uv; + } +} + +static void SharpYuvFilterRow_C(const int16_t* A, const int16_t* B, int len, + const uint16_t* best_y, uint16_t* out, + int bit_depth) { + int i; + const int max_y = (1 << bit_depth) - 1; + for (i = 0; i < len; ++i, ++A, ++B) { + const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; + const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; + out[2 * i + 0] = clip(best_y[2 * i + 0] + v0, max_y); + out[2 * i + 1] = clip(best_y[2 * i + 1] + v1, max_y); + } +} +#endif // !WEBP_NEON_OMIT_C_CODE + +//----------------------------------------------------------------------------- + +uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, + uint16_t* dst, int len, int bit_depth); +void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, int16_t* dst, + int len); +void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, + const uint16_t* best_y, uint16_t* out, + int bit_depth); + +extern void InitSharpYuvSSE2(void); +extern void InitSharpYuvNEON(void); + +void SharpYuvInitDsp(VP8CPUInfo cpu_info_func) { + (void)cpu_info_func; + +#if !WEBP_NEON_OMIT_C_CODE + SharpYuvUpdateY = SharpYuvUpdateY_C; + SharpYuvUpdateRGB = SharpYuvUpdateRGB_C; + SharpYuvFilterRow = SharpYuvFilterRow_C; +#endif + +#if defined(WEBP_HAVE_SSE2) + if (cpu_info_func == NULL || cpu_info_func(kSSE2)) { + InitSharpYuvSSE2(); + } +#endif // WEBP_HAVE_SSE2 + +#if defined(WEBP_HAVE_NEON) + if (WEBP_NEON_OMIT_C_CODE || cpu_info_func == NULL || cpu_info_func(kNEON)) { + InitSharpYuvNEON(); + } +#endif // WEBP_HAVE_NEON + + assert(SharpYuvUpdateY != NULL); + assert(SharpYuvUpdateRGB != NULL); + assert(SharpYuvFilterRow != NULL); +} diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.h b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.h new file mode 100644 index 00000000000..e561d8d3d04 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_dsp.h @@ -0,0 +1,29 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Speed-critical functions for Sharp YUV. + +#ifndef WEBP_SHARPYUV_SHARPYUV_DSP_H_ +#define WEBP_SHARPYUV_SHARPYUV_DSP_H_ + +#include + +#include "src/dsp/cpu.h" + +extern uint64_t (*SharpYuvUpdateY)(const uint16_t* src, const uint16_t* ref, + uint16_t* dst, int len, int bit_depth); +extern void (*SharpYuvUpdateRGB)(const int16_t* src, const int16_t* ref, + int16_t* dst, int len); +extern void (*SharpYuvFilterRow)(const int16_t* A, const int16_t* B, int len, + const uint16_t* best_y, uint16_t* out, + int bit_depth); + +void SharpYuvInitDsp(VP8CPUInfo cpu_info_func); + +#endif // WEBP_SHARPYUV_SHARPYUV_DSP_H_ diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c new file mode 100644 index 00000000000..05b5436f831 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.c @@ -0,0 +1,114 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Gamma correction utilities. + +#include "sharpyuv/sharpyuv_gamma.h" + +#include +#include +#include + +#include "src/webp/types.h" + +// Gamma correction compensates loss of resolution during chroma subsampling. +// Size of pre-computed table for converting from gamma to linear. +#define GAMMA_TO_LINEAR_TAB_BITS 10 +#define GAMMA_TO_LINEAR_TAB_SIZE (1 << GAMMA_TO_LINEAR_TAB_BITS) +static uint32_t kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 2]; +#define LINEAR_TO_GAMMA_TAB_BITS 9 +#define LINEAR_TO_GAMMA_TAB_SIZE (1 << LINEAR_TO_GAMMA_TAB_BITS) +static uint32_t kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 2]; + +static const double kGammaF = 1. / 0.45; +#define GAMMA_TO_LINEAR_BITS 16 + +static volatile int kGammaTablesSOk = 0; +void SharpYuvInitGammaTables(void) { + assert(GAMMA_TO_LINEAR_BITS <= 16); + if (!kGammaTablesSOk) { + int v; + const double a = 0.09929682680944; + const double thresh = 0.018053968510807; + const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; + // Precompute gamma to linear table. + { + const double norm = 1. / GAMMA_TO_LINEAR_TAB_SIZE; + const double a_rec = 1. / (1. + a); + for (v = 0; v <= GAMMA_TO_LINEAR_TAB_SIZE; ++v) { + const double g = norm * v; + double value; + if (g <= thresh * 4.5) { + value = g / 4.5; + } else { + value = pow(a_rec * (g + a), kGammaF); + } + kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); + } + // to prevent small rounding errors to cause read-overflow: + kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE + 1] = + kGammaToLinearTabS[GAMMA_TO_LINEAR_TAB_SIZE]; + } + // Precompute linear to gamma table. + { + const double scale = 1. / LINEAR_TO_GAMMA_TAB_SIZE; + for (v = 0; v <= LINEAR_TO_GAMMA_TAB_SIZE; ++v) { + const double g = scale * v; + double value; + if (g <= thresh) { + value = 4.5 * g; + } else { + value = (1. + a) * pow(g, 1. / kGammaF) - a; + } + kLinearToGammaTabS[v] = + (uint32_t)(final_scale * value + 0.5); + } + // to prevent small rounding errors to cause read-overflow: + kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE + 1] = + kLinearToGammaTabS[LINEAR_TO_GAMMA_TAB_SIZE]; + } + kGammaTablesSOk = 1; + } +} + +static WEBP_INLINE int Shift(int v, int shift) { + return (shift >= 0) ? (v << shift) : (v >> -shift); +} + +static WEBP_INLINE uint32_t FixedPointInterpolation(int v, uint32_t* tab, + int tab_pos_shift_right, + int tab_value_shift) { + const uint32_t tab_pos = Shift(v, -tab_pos_shift_right); + // fractional part, in 'tab_pos_shift' fixed-point precision + const uint32_t x = v - (tab_pos << tab_pos_shift_right); // fractional part + // v0 / v1 are in kGammaToLinearBits fixed-point precision (range [0..1]) + const uint32_t v0 = Shift(tab[tab_pos + 0], tab_value_shift); + const uint32_t v1 = Shift(tab[tab_pos + 1], tab_value_shift); + // Final interpolation. + const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0. + const int half = + (tab_pos_shift_right > 0) ? 1 << (tab_pos_shift_right - 1) : 0; + const uint32_t result = v0 + ((v2 + half) >> tab_pos_shift_right); + return result; +} + +uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth) { + const int shift = GAMMA_TO_LINEAR_TAB_BITS - bit_depth; + if (shift > 0) { + return kGammaToLinearTabS[v << shift]; + } + return FixedPointInterpolation(v, kGammaToLinearTabS, -shift, 0); +} + +uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth) { + return FixedPointInterpolation( + value, kLinearToGammaTabS, + (GAMMA_TO_LINEAR_BITS - LINEAR_TO_GAMMA_TAB_BITS), + bit_depth - GAMMA_TO_LINEAR_BITS); +} diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.h b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.h new file mode 100644 index 00000000000..2f1a3ff4a03 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_gamma.h @@ -0,0 +1,35 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Gamma correction utilities. + +#ifndef WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ +#define WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Initializes precomputed tables. Must be called once before calling +// SharpYuvGammaToLinear or SharpYuvLinearToGamma. +void SharpYuvInitGammaTables(void); + +// Converts a gamma color value on 'bit_depth' bits to a 16 bit linear value. +uint32_t SharpYuvGammaToLinear(uint16_t v, int bit_depth); + +// Converts a 16 bit linear color value to a gamma value on 'bit_depth' bits. +uint16_t SharpYuvLinearToGamma(uint32_t value, int bit_depth); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_SHARPYUV_SHARPYUV_GAMMA_H_ diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_neon.c b/thirdparty/libwebp/sharpyuv/sharpyuv_neon.c new file mode 100644 index 00000000000..5cf6aaffb07 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_neon.c @@ -0,0 +1,182 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Speed-critical functions for Sharp YUV. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "sharpyuv/sharpyuv_dsp.h" + +#if defined(WEBP_USE_NEON) +#include +#include +#include +#endif + +extern void InitSharpYuvNEON(void); + +#if defined(WEBP_USE_NEON) + +static uint16_t clip_NEON(int v, int max) { + return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; +} + +static uint64_t SharpYuvUpdateY_NEON(const uint16_t* ref, const uint16_t* src, + uint16_t* dst, int len, int bit_depth) { + const int max_y = (1 << bit_depth) - 1; + int i; + const int16x8_t zero = vdupq_n_s16(0); + const int16x8_t max = vdupq_n_s16(max_y); + uint64x2_t sum = vdupq_n_u64(0); + uint64_t diff; + + for (i = 0; i + 8 <= len; i += 8) { + const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i)); + const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i)); + const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i)); + const int16x8_t D = vsubq_s16(A, B); // diff_y + const int16x8_t F = vaddq_s16(C, D); // new_y + const uint16x8_t H = + vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero)); + const int16x8_t I = vabsq_s16(D); // abs(diff_y) + vst1q_u16(dst + i, H); + sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I))); + } + diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1); + for (; i < len; ++i) { + const int diff_y = ref[i] - src[i]; + const int new_y = (int)(dst[i]) + diff_y; + dst[i] = clip_NEON(new_y, max_y); + diff += (uint64_t)(abs(diff_y)); + } + return diff; +} + +static void SharpYuvUpdateRGB_NEON(const int16_t* ref, const int16_t* src, + int16_t* dst, int len) { + int i; + for (i = 0; i + 8 <= len; i += 8) { + const int16x8_t A = vld1q_s16(ref + i); + const int16x8_t B = vld1q_s16(src + i); + const int16x8_t C = vld1q_s16(dst + i); + const int16x8_t D = vsubq_s16(A, B); // diff_uv + const int16x8_t E = vaddq_s16(C, D); // new_uv + vst1q_s16(dst + i, E); + } + for (; i < len; ++i) { + const int diff_uv = ref[i] - src[i]; + dst[i] += diff_uv; + } +} + +static void SharpYuvFilterRow16_NEON(const int16_t* A, const int16_t* B, + int len, const uint16_t* best_y, + uint16_t* out, int bit_depth) { + const int max_y = (1 << bit_depth) - 1; + int i; + const int16x8_t max = vdupq_n_s16(max_y); + const int16x8_t zero = vdupq_n_s16(0); + for (i = 0; i + 8 <= len; i += 8) { + const int16x8_t a0 = vld1q_s16(A + i + 0); + const int16x8_t a1 = vld1q_s16(A + i + 1); + const int16x8_t b0 = vld1q_s16(B + i + 0); + const int16x8_t b1 = vld1q_s16(B + i + 1); + const int16x8_t a0b1 = vaddq_s16(a0, b1); + const int16x8_t a1b0 = vaddq_s16(a1, b0); + const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1 + const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1) + const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0) + const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3); + const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3); + const int16x8_t e0 = vrhaddq_s16(c1, a0); + const int16x8_t e1 = vrhaddq_s16(c0, a1); + const int16x8x2_t f = vzipq_s16(e0, e1); + const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0)); + const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8)); + const int16x8_t h0 = vaddq_s16(g0, f.val[0]); + const int16x8_t h1 = vaddq_s16(g1, f.val[1]); + const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero); + const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero); + vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0)); + vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1)); + } + for (; i < len; ++i) { + const int a0b1 = A[i + 0] + B[i + 1]; + const int a1b0 = A[i + 1] + B[i + 0]; + const int a0a1b0b1 = a0b1 + a1b0 + 8; + const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; + const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; + out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y); + out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y); + } +} + +static void SharpYuvFilterRow32_NEON(const int16_t* A, const int16_t* B, + int len, const uint16_t* best_y, + uint16_t* out, int bit_depth) { + const int max_y = (1 << bit_depth) - 1; + int i; + const uint16x8_t max = vdupq_n_u16(max_y); + for (i = 0; i + 4 <= len; i += 4) { + const int16x4_t a0 = vld1_s16(A + i + 0); + const int16x4_t a1 = vld1_s16(A + i + 1); + const int16x4_t b0 = vld1_s16(B + i + 0); + const int16x4_t b1 = vld1_s16(B + i + 1); + const int32x4_t a0b1 = vaddl_s16(a0, b1); + const int32x4_t a1b0 = vaddl_s16(a1, b0); + const int32x4_t a0a1b0b1 = vaddq_s32(a0b1, a1b0); // A0+A1+B0+B1 + const int32x4_t a0b1_2 = vaddq_s32(a0b1, a0b1); // 2*(A0+B1) + const int32x4_t a1b0_2 = vaddq_s32(a1b0, a1b0); // 2*(A1+B0) + const int32x4_t c0 = vshrq_n_s32(vaddq_s32(a0b1_2, a0a1b0b1), 3); + const int32x4_t c1 = vshrq_n_s32(vaddq_s32(a1b0_2, a0a1b0b1), 3); + const int32x4_t e0 = vrhaddq_s32(c1, vmovl_s16(a0)); + const int32x4_t e1 = vrhaddq_s32(c0, vmovl_s16(a1)); + const int32x4x2_t f = vzipq_s32(e0, e1); + + const int16x8_t g = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i)); + const int32x4_t h0 = vaddw_s16(f.val[0], vget_low_s16(g)); + const int32x4_t h1 = vaddw_s16(f.val[1], vget_high_s16(g)); + const uint16x8_t i_16 = vcombine_u16(vqmovun_s32(h0), vqmovun_s32(h1)); + const uint16x8_t i_clamped = vminq_u16(i_16, max); + vst1q_u16(out + 2 * i + 0, i_clamped); + } + for (; i < len; ++i) { + const int a0b1 = A[i + 0] + B[i + 1]; + const int a1b0 = A[i + 1] + B[i + 0]; + const int a0a1b0b1 = a0b1 + a1b0 + 8; + const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; + const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; + out[2 * i + 0] = clip_NEON(best_y[2 * i + 0] + v0, max_y); + out[2 * i + 1] = clip_NEON(best_y[2 * i + 1] + v1, max_y); + } +} + +static void SharpYuvFilterRow_NEON(const int16_t* A, const int16_t* B, int len, + const uint16_t* best_y, uint16_t* out, + int bit_depth) { + if (bit_depth <= 10) { + SharpYuvFilterRow16_NEON(A, B, len, best_y, out, bit_depth); + } else { + SharpYuvFilterRow32_NEON(A, B, len, best_y, out, bit_depth); + } +} + +//------------------------------------------------------------------------------ + +WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvNEON(void) { + SharpYuvUpdateY = SharpYuvUpdateY_NEON; + SharpYuvUpdateRGB = SharpYuvUpdateRGB_NEON; + SharpYuvFilterRow = SharpYuvFilterRow_NEON; +} + +#else // !WEBP_USE_NEON + +void InitSharpYuvNEON(void) {} + +#endif // WEBP_USE_NEON diff --git a/thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c b/thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c new file mode 100644 index 00000000000..19438737480 --- /dev/null +++ b/thirdparty/libwebp/sharpyuv/sharpyuv_sse2.c @@ -0,0 +1,204 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// Speed-critical functions for Sharp YUV. +// +// Author: Skal (pascal.massimino@gmail.com) + +#include "sharpyuv/sharpyuv_dsp.h" + +#if defined(WEBP_USE_SSE2) +#include +#include +#endif + +extern void InitSharpYuvSSE2(void); + +#if defined(WEBP_USE_SSE2) + +static uint16_t clip_SSE2(int v, int max) { + return (v < 0) ? 0 : (v > max) ? max : (uint16_t)v; +} + +static uint64_t SharpYuvUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, + uint16_t* dst, int len, int bit_depth) { + const int max_y = (1 << bit_depth) - 1; + uint64_t diff = 0; + uint32_t tmp[4]; + int i; + const __m128i zero = _mm_setzero_si128(); + const __m128i max = _mm_set1_epi16(max_y); + const __m128i one = _mm_set1_epi16(1); + __m128i sum = zero; + + for (i = 0; i + 8 <= len; i += 8) { + const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); + const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); + const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); + const __m128i D = _mm_sub_epi16(A, B); // diff_y + const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0) + const __m128i F = _mm_add_epi16(C, D); // new_y + const __m128i G = _mm_or_si128(E, one); // -1 or 1 + const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero); + const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...)) + _mm_storeu_si128((__m128i*)(dst + i), H); + sum = _mm_add_epi32(sum, I); + } + _mm_storeu_si128((__m128i*)tmp, sum); + diff = tmp[3] + tmp[2] + tmp[1] + tmp[0]; + for (; i < len; ++i) { + const int diff_y = ref[i] - src[i]; + const int new_y = (int)dst[i] + diff_y; + dst[i] = clip_SSE2(new_y, max_y); + diff += (uint64_t)abs(diff_y); + } + return diff; +} + +static void SharpYuvUpdateRGB_SSE2(const int16_t* ref, const int16_t* src, + int16_t* dst, int len) { + int i = 0; + for (i = 0; i + 8 <= len; i += 8) { + const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); + const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); + const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); + const __m128i D = _mm_sub_epi16(A, B); // diff_uv + const __m128i E = _mm_add_epi16(C, D); // new_uv + _mm_storeu_si128((__m128i*)(dst + i), E); + } + for (; i < len; ++i) { + const int diff_uv = ref[i] - src[i]; + dst[i] += diff_uv; + } +} + +static void SharpYuvFilterRow16_SSE2(const int16_t* A, const int16_t* B, + int len, const uint16_t* best_y, + uint16_t* out, int bit_depth) { + const int max_y = (1 << bit_depth) - 1; + int i; + const __m128i kCst8 = _mm_set1_epi16(8); + const __m128i max = _mm_set1_epi16(max_y); + const __m128i zero = _mm_setzero_si128(); + for (i = 0; i + 8 <= len; i += 8) { + const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); + const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1)); + const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0)); + const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1)); + const __m128i a0b1 = _mm_add_epi16(a0, b1); + const __m128i a1b0 = _mm_add_epi16(a1, b0); + const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1 + const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8); + const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1) + const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0) + const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3); + const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3); + const __m128i d0 = _mm_add_epi16(c1, a0); + const __m128i d1 = _mm_add_epi16(c0, a1); + const __m128i e0 = _mm_srai_epi16(d0, 1); + const __m128i e1 = _mm_srai_epi16(d1, 1); + const __m128i f0 = _mm_unpacklo_epi16(e0, e1); + const __m128i f1 = _mm_unpackhi_epi16(e0, e1); + const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); + const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8)); + const __m128i h0 = _mm_add_epi16(g0, f0); + const __m128i h1 = _mm_add_epi16(g1, f1); + const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero); + const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero); + _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0); + _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1); + } + for (; i < len; ++i) { + // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = + // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 + // We reuse the common sub-expressions. + const int a0b1 = A[i + 0] + B[i + 1]; + const int a1b0 = A[i + 1] + B[i + 0]; + const int a0a1b0b1 = a0b1 + a1b0 + 8; + const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; + const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; + out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y); + out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y); + } +} + +static WEBP_INLINE __m128i s16_to_s32(__m128i in) { + return _mm_srai_epi32(_mm_unpacklo_epi16(in, in), 16); +} + +static void SharpYuvFilterRow32_SSE2(const int16_t* A, const int16_t* B, + int len, const uint16_t* best_y, + uint16_t* out, int bit_depth) { + const int max_y = (1 << bit_depth) - 1; + int i; + const __m128i kCst8 = _mm_set1_epi32(8); + const __m128i max = _mm_set1_epi16(max_y); + const __m128i zero = _mm_setzero_si128(); + for (i = 0; i + 4 <= len; i += 4) { + const __m128i a0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 0))); + const __m128i a1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(A + i + 1))); + const __m128i b0 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 0))); + const __m128i b1 = s16_to_s32(_mm_loadl_epi64((const __m128i*)(B + i + 1))); + const __m128i a0b1 = _mm_add_epi32(a0, b1); + const __m128i a1b0 = _mm_add_epi32(a1, b0); + const __m128i a0a1b0b1 = _mm_add_epi32(a0b1, a1b0); // A0+A1+B0+B1 + const __m128i a0a1b0b1_8 = _mm_add_epi32(a0a1b0b1, kCst8); + const __m128i a0b1_2 = _mm_add_epi32(a0b1, a0b1); // 2*(A0+B1) + const __m128i a1b0_2 = _mm_add_epi32(a1b0, a1b0); // 2*(A1+B0) + const __m128i c0 = _mm_srai_epi32(_mm_add_epi32(a0b1_2, a0a1b0b1_8), 3); + const __m128i c1 = _mm_srai_epi32(_mm_add_epi32(a1b0_2, a0a1b0b1_8), 3); + const __m128i d0 = _mm_add_epi32(c1, a0); + const __m128i d1 = _mm_add_epi32(c0, a1); + const __m128i e0 = _mm_srai_epi32(d0, 1); + const __m128i e1 = _mm_srai_epi32(d1, 1); + const __m128i f0 = _mm_unpacklo_epi32(e0, e1); + const __m128i f1 = _mm_unpackhi_epi32(e0, e1); + const __m128i g = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); + const __m128i h_16 = _mm_add_epi16(g, _mm_packs_epi32(f0, f1)); + const __m128i final = _mm_max_epi16(_mm_min_epi16(h_16, max), zero); + _mm_storeu_si128((__m128i*)(out + 2 * i + 0), final); + } + for (; i < len; ++i) { + // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = + // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 + // We reuse the common sub-expressions. + const int a0b1 = A[i + 0] + B[i + 1]; + const int a1b0 = A[i + 1] + B[i + 0]; + const int a0a1b0b1 = a0b1 + a1b0 + 8; + const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; + const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; + out[2 * i + 0] = clip_SSE2(best_y[2 * i + 0] + v0, max_y); + out[2 * i + 1] = clip_SSE2(best_y[2 * i + 1] + v1, max_y); + } +} + +static void SharpYuvFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, + const uint16_t* best_y, uint16_t* out, + int bit_depth) { + if (bit_depth <= 10) { + SharpYuvFilterRow16_SSE2(A, B, len, best_y, out, bit_depth); + } else { + SharpYuvFilterRow32_SSE2(A, B, len, best_y, out, bit_depth); + } +} + +//------------------------------------------------------------------------------ + +extern void InitSharpYuvSSE2(void); + +WEBP_TSAN_IGNORE_FUNCTION void InitSharpYuvSSE2(void) { + SharpYuvUpdateY = SharpYuvUpdateY_SSE2; + SharpYuvUpdateRGB = SharpYuvUpdateRGB_SSE2; + SharpYuvFilterRow = SharpYuvFilterRow_SSE2; +} +#else // !WEBP_USE_SSE2 + +void InitSharpYuvSSE2(void) {} + +#endif // WEBP_USE_SSE2 diff --git a/thirdparty/libwebp/src/dec/vp8i_dec.h b/thirdparty/libwebp/src/dec/vp8i_dec.h index 9af22f8cc61..30c1bd3ef9b 100644 --- a/thirdparty/libwebp/src/dec/vp8i_dec.h +++ b/thirdparty/libwebp/src/dec/vp8i_dec.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define DEC_MAJ_VERSION 1 #define DEC_MIN_VERSION 2 -#define DEC_REV_VERSION 2 +#define DEC_REV_VERSION 4 // YUV-cache parameters. Cache is 32-bytes wide (= one cacheline). // Constraints are: We need to store one 16x16 block of luma samples (y), diff --git a/thirdparty/libwebp/src/dec/vp8l_dec.c b/thirdparty/libwebp/src/dec/vp8l_dec.c index 78db0140300..13480551282 100644 --- a/thirdparty/libwebp/src/dec/vp8l_dec.c +++ b/thirdparty/libwebp/src/dec/vp8l_dec.c @@ -178,7 +178,7 @@ static WEBP_INLINE int PlaneCodeToDistance(int xsize, int plane_code) { //------------------------------------------------------------------------------ // Decodes the next Huffman code from bit-stream. -// FillBitWindow(br) needs to be called at minimum every second call +// VP8LFillBitWindow(br) needs to be called at minimum every second call // to ReadSymbol, in order to pre-fetch enough bits. static WEBP_INLINE int ReadSymbol(const HuffmanCode* table, VP8LBitReader* const br) { @@ -321,7 +321,7 @@ static int ReadHuffmanCode(int alphabet_size, VP8LDecoder* const dec, // The first code is either 1 bit or 8 bit code. int symbol = VP8LReadBits(br, (first_symbol_len_code == 0) ? 1 : 8); code_lengths[symbol] = 1; - // The second code (if present), is always 8 bit long. + // The second code (if present), is always 8 bits long. if (num_symbols == 2) { symbol = VP8LReadBits(br, 8); code_lengths[symbol] = 1; @@ -1281,7 +1281,7 @@ static int ExpandColorMap(int num_colors, VP8LTransform* const transform) { uint8_t* const new_data = (uint8_t*)new_color_map; new_color_map[0] = transform->data_[0]; for (i = 4; i < 4 * num_colors; ++i) { - // Equivalent to AddPixelEq(), on a byte-basis. + // Equivalent to VP8LAddPixels(), on a byte-basis. new_data[i] = (data[i] + new_data[i - 4]) & 0xff; } for (; i < 4 * final_num_colors; ++i) { diff --git a/thirdparty/libwebp/src/demux/demux.c b/thirdparty/libwebp/src/demux/demux.c index f04a2b84502..41387ec2d65 100644 --- a/thirdparty/libwebp/src/demux/demux.c +++ b/thirdparty/libwebp/src/demux/demux.c @@ -25,7 +25,7 @@ #define DMUX_MAJ_VERSION 1 #define DMUX_MIN_VERSION 2 -#define DMUX_REV_VERSION 2 +#define DMUX_REV_VERSION 4 typedef struct { size_t start_; // start location of the data @@ -614,7 +614,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) { while (f != NULL) { const int cur_frame_set = f->frame_num_; - int frame_count = 0; // Check frame properties. for (; f != NULL && f->frame_num_ == cur_frame_set; f = f->next_) { @@ -649,8 +648,6 @@ static int IsValidExtendedFormat(const WebPDemuxer* const dmux) { dmux->canvas_width_, dmux->canvas_height_)) { return 0; } - - ++frame_count; } } return 1; diff --git a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c index 9e0ace94211..6716fb77f0d 100644 --- a/thirdparty/libwebp/src/dsp/alpha_processing_neon.c +++ b/thirdparty/libwebp/src/dsp/alpha_processing_neon.c @@ -83,7 +83,7 @@ static void ApplyAlphaMultiply_NEON(uint8_t* rgba, int alpha_first, static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha, int alpha_stride, int width, int height, uint8_t* WEBP_RESTRICT dst, int dst_stride) { - uint32_t alpha_mask = 0xffffffffu; + uint32_t alpha_mask = 0xffu; uint8x8_t mask8 = vdup_n_u8(0xff); uint32_t tmp[2]; int i, j; @@ -107,6 +107,7 @@ static int DispatchAlpha_NEON(const uint8_t* WEBP_RESTRICT alpha, dst += dst_stride; } vst1_u8((uint8_t*)tmp, mask8); + alpha_mask *= 0x01010101; alpha_mask &= tmp[0]; alpha_mask &= tmp[1]; return (alpha_mask != 0xffffffffu); @@ -135,7 +136,7 @@ static void DispatchAlphaToGreen_NEON(const uint8_t* WEBP_RESTRICT alpha, static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride, int width, int height, uint8_t* WEBP_RESTRICT alpha, int alpha_stride) { - uint32_t alpha_mask = 0xffffffffu; + uint32_t alpha_mask = 0xffu; uint8x8_t mask8 = vdup_n_u8(0xff); uint32_t tmp[2]; int i, j; @@ -157,6 +158,7 @@ static int ExtractAlpha_NEON(const uint8_t* WEBP_RESTRICT argb, int argb_stride, alpha += alpha_stride; } vst1_u8((uint8_t*)tmp, mask8); + alpha_mask *= 0x01010101; alpha_mask &= tmp[0]; alpha_mask &= tmp[1]; return (alpha_mask == 0xffffffffu); diff --git a/thirdparty/libwebp/src/dsp/cpu.c b/thirdparty/libwebp/src/dsp/cpu.c index 3145e190a4c..a4ba7f2cb70 100644 --- a/thirdparty/libwebp/src/dsp/cpu.c +++ b/thirdparty/libwebp/src/dsp/cpu.c @@ -11,7 +11,7 @@ // // Author: Christian Duvivier (cduvivier@google.com) -#include "src/dsp/dsp.h" +#include "src/dsp/cpu.h" #if defined(WEBP_HAVE_NEON_RTCD) #include diff --git a/thirdparty/libwebp/src/dsp/cpu.h b/thirdparty/libwebp/src/dsp/cpu.h new file mode 100644 index 00000000000..57a40d87d48 --- /dev/null +++ b/thirdparty/libwebp/src/dsp/cpu.h @@ -0,0 +1,254 @@ +// Copyright 2022 Google Inc. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the COPYING file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// ----------------------------------------------------------------------------- +// +// CPU detection functions and macros. +// +// Author: Skal (pascal.massimino@gmail.com) + +#ifndef WEBP_DSP_CPU_H_ +#define WEBP_DSP_CPU_H_ + +#ifdef HAVE_CONFIG_H +#include "src/webp/config.h" +#endif + +#include "src/webp/types.h" + +#if defined(__GNUC__) +#define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) +#define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) +#else +#define LOCAL_GCC_VERSION 0 +#define LOCAL_GCC_PREREQ(maj, min) 0 +#endif + +#if defined(__clang__) +#define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) +#define LOCAL_CLANG_PREREQ(maj, min) \ + (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) +#else +#define LOCAL_CLANG_VERSION 0 +#define LOCAL_CLANG_PREREQ(maj, min) 0 +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + +#if !defined(HAVE_CONFIG_H) +#if defined(_MSC_VER) && _MSC_VER > 1310 && \ + (defined(_M_X64) || defined(_M_IX86)) +#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets +#endif + +#if defined(_MSC_VER) && _MSC_VER >= 1500 && \ + (defined(_M_X64) || defined(_M_IX86)) +#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets +#endif +#endif + +// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp +// files without intrinsics, allowing the corresponding Init() to be called. +// Files containing intrinsics will need to be built targeting the instruction +// set so should succeed on one of the earlier tests. +#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2)) +#define WEBP_USE_SSE2 +#endif + +#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2) +#define WEBP_HAVE_SSE2 +#endif + +#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41)) +#define WEBP_USE_SSE41 +#endif + +#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41) +#define WEBP_HAVE_SSE41 +#endif + +#undef WEBP_MSC_SSE41 +#undef WEBP_MSC_SSE2 + +// The intrinsics currently cause compiler errors with arm-nacl-gcc and the +// inline assembly would need to be modified for use with Native Client. +#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ + (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \ + !defined(__native_client__) +#define WEBP_USE_NEON +#endif + +#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \ + defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H) +#define WEBP_ANDROID_NEON // Android targets that may have NEON +#define WEBP_USE_NEON +#endif + +// Note: ARM64 is supported in Visual Studio 2017, but requires the direct +// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in +// arm_neon.h. Compile errors were seen with Visual Studio 2019 16.4 with +// vtbl4_u8(); a fix was made in 16.6. +#if defined(_MSC_VER) && ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ + (_MSC_VER >= 1926 && defined(_M_ARM64))) +#define WEBP_USE_NEON +#define WEBP_USE_INTRINSICS +#endif + +#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) +#define WEBP_HAVE_NEON +#endif + +#if defined(__mips__) && !defined(__mips64) && defined(__mips_isa_rev) && \ + (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) +#define WEBP_USE_MIPS32 +#if (__mips_isa_rev >= 2) +#define WEBP_USE_MIPS32_R2 +#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2) +#define WEBP_USE_MIPS_DSP_R2 +#endif +#endif +#endif + +#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) +#define WEBP_USE_MSA +#endif + +#ifndef WEBP_DSP_OMIT_C_CODE +#define WEBP_DSP_OMIT_C_CODE 1 +#endif + +#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE +#define WEBP_NEON_OMIT_C_CODE 1 +#else +#define WEBP_NEON_OMIT_C_CODE 0 +#endif + +#if !(LOCAL_CLANG_PREREQ(3, 8) || LOCAL_GCC_PREREQ(4, 8) || \ + defined(__aarch64__)) +#define WEBP_NEON_WORK_AROUND_GCC 1 +#else +#define WEBP_NEON_WORK_AROUND_GCC 0 +#endif + +// This macro prevents thread_sanitizer from reporting known concurrent writes. +#define WEBP_TSAN_IGNORE_FUNCTION +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#undef WEBP_TSAN_IGNORE_FUNCTION +#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) +#endif +#endif + +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) +#define WEBP_MSAN +#endif +#endif + +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) +#include // NOLINT + +#define WEBP_DSP_INIT(func) \ + do { \ + static volatile VP8CPUInfo func##_last_cpuinfo_used = \ + (VP8CPUInfo)&func##_last_cpuinfo_used; \ + static pthread_mutex_t func##_lock = PTHREAD_MUTEX_INITIALIZER; \ + if (pthread_mutex_lock(&func##_lock)) break; \ + if (func##_last_cpuinfo_used != VP8GetCPUInfo) func(); \ + func##_last_cpuinfo_used = VP8GetCPUInfo; \ + (void)pthread_mutex_unlock(&func##_lock); \ + } while (0) +#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) +#define WEBP_DSP_INIT(func) \ + do { \ + static volatile VP8CPUInfo func##_last_cpuinfo_used = \ + (VP8CPUInfo)&func##_last_cpuinfo_used; \ + if (func##_last_cpuinfo_used == VP8GetCPUInfo) break; \ + func(); \ + func##_last_cpuinfo_used = VP8GetCPUInfo; \ + } while (0) +#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) + +// Defines an Init + helper function that control multiple initialization of +// function pointers / tables. +/* Usage: + WEBP_DSP_INIT_FUNC(InitFunc) { + ...function body + } +*/ +#define WEBP_DSP_INIT_FUNC(name) \ + static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void); \ + WEBP_TSAN_IGNORE_FUNCTION void name(void) { WEBP_DSP_INIT(name##_body); } \ + static WEBP_TSAN_IGNORE_FUNCTION void name##_body(void) + +#define WEBP_UBSAN_IGNORE_UNDEF +#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW +#if defined(__clang__) && defined(__has_attribute) +#if __has_attribute(no_sanitize) +// This macro prevents the undefined behavior sanitizer from reporting +// failures. This is only meant to silence unaligned loads on platforms that +// are known to support them. +#undef WEBP_UBSAN_IGNORE_UNDEF +#define WEBP_UBSAN_IGNORE_UNDEF __attribute__((no_sanitize("undefined"))) + +// This macro prevents the undefined behavior sanitizer from reporting +// failures related to unsigned integer overflows. This is only meant to +// silence cases where this well defined behavior is expected. +#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW +#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ + __attribute__((no_sanitize("unsigned-integer-overflow"))) +#endif +#endif + +// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. +// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. +#if !defined(WEBP_OFFSET_PTR) +#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) +#endif + +// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) +#if !defined(WEBP_SWAP_16BIT_CSP) +#define WEBP_SWAP_16BIT_CSP 0 +#endif + +// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) +#if !defined(WORDS_BIGENDIAN) && \ + (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ + (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) +#define WORDS_BIGENDIAN +#endif + +typedef enum { + kSSE2, + kSSE3, + kSlowSSSE3, // special feature for slow SSSE3 architectures + kSSE4_1, + kAVX, + kAVX2, + kNEON, + kMIPS32, + kMIPSdspR2, + kMSA +} CPUFeature; + +#ifdef __cplusplus +extern "C" { +#endif + +// returns true if the CPU supports the feature. +typedef int (*VP8CPUInfo)(CPUFeature feature); +WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBP_DSP_CPU_H_ diff --git a/thirdparty/libwebp/src/dsp/dsp.h b/thirdparty/libwebp/src/dsp/dsp.h index c4f57e4d5bf..d2000b8efcb 100644 --- a/thirdparty/libwebp/src/dsp/dsp.h +++ b/thirdparty/libwebp/src/dsp/dsp.h @@ -18,6 +18,7 @@ #include "src/webp/config.h" #endif +#include "src/dsp/cpu.h" #include "src/webp/types.h" #ifdef __cplusplus @@ -43,225 +44,6 @@ extern "C" { #define WEBP_RESTRICT #endif -//------------------------------------------------------------------------------ -// CPU detection - -#if defined(__GNUC__) -# define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) -# define LOCAL_GCC_PREREQ(maj, min) \ - (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) -#else -# define LOCAL_GCC_VERSION 0 -# define LOCAL_GCC_PREREQ(maj, min) 0 -#endif - -#if defined(__clang__) -# define LOCAL_CLANG_VERSION ((__clang_major__ << 8) | __clang_minor__) -# define LOCAL_CLANG_PREREQ(maj, min) \ - (LOCAL_CLANG_VERSION >= (((maj) << 8) | (min))) -#else -# define LOCAL_CLANG_VERSION 0 -# define LOCAL_CLANG_PREREQ(maj, min) 0 -#endif - -#ifndef __has_builtin -# define __has_builtin(x) 0 -#endif - -#if !defined(HAVE_CONFIG_H) -#if defined(_MSC_VER) && _MSC_VER > 1310 && \ - (defined(_M_X64) || defined(_M_IX86)) -#define WEBP_MSC_SSE2 // Visual C++ SSE2 targets -#endif - -#if defined(_MSC_VER) && _MSC_VER >= 1500 && \ - (defined(_M_X64) || defined(_M_IX86)) -#define WEBP_MSC_SSE41 // Visual C++ SSE4.1 targets -#endif -#endif - -// WEBP_HAVE_* are used to indicate the presence of the instruction set in dsp -// files without intrinsics, allowing the corresponding Init() to be called. -// Files containing intrinsics will need to be built targeting the instruction -// set so should succeed on one of the earlier tests. -#if (defined(__SSE2__) || defined(WEBP_MSC_SSE2)) && \ - (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE2)) -#define WEBP_USE_SSE2 -#endif - -#if defined(WEBP_USE_SSE2) && !defined(WEBP_HAVE_SSE2) -#define WEBP_HAVE_SSE2 -#endif - -#if (defined(__SSE4_1__) || defined(WEBP_MSC_SSE41)) && \ - (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_SSE41)) -#define WEBP_USE_SSE41 -#endif - -#if defined(WEBP_USE_SSE41) && !defined(WEBP_HAVE_SSE41) -#define WEBP_HAVE_SSE41 -#endif - -#undef WEBP_MSC_SSE41 -#undef WEBP_MSC_SSE2 - -// The intrinsics currently cause compiler errors with arm-nacl-gcc and the -// inline assembly would need to be modified for use with Native Client. -#if ((defined(__ARM_NEON__) || defined(__aarch64__)) && \ - (!defined(HAVE_CONFIG_H) || defined(WEBP_HAVE_NEON))) && \ - !defined(__native_client__) -#define WEBP_USE_NEON -#endif - -#if !defined(WEBP_USE_NEON) && defined(__ANDROID__) && \ - defined(__ARM_ARCH_7A__) && defined(HAVE_CPU_FEATURES_H) -#define WEBP_ANDROID_NEON // Android targets that may have NEON -#define WEBP_USE_NEON -#endif - -// Note: ARM64 is supported in Visual Studio 2017, but requires the direct -// inclusion of arm64_neon.h; Visual Studio 2019 includes this file in -// arm_neon.h. -#if defined(_MSC_VER) && \ - ((_MSC_VER >= 1700 && defined(_M_ARM)) || \ - (_MSC_VER >= 1920 && defined(_M_ARM64))) -#define WEBP_USE_NEON -#define WEBP_USE_INTRINSICS -#endif - -#if defined(WEBP_USE_NEON) && !defined(WEBP_HAVE_NEON) -#define WEBP_HAVE_NEON -#endif - -#if defined(__mips__) && !defined(__mips64) && \ - defined(__mips_isa_rev) && (__mips_isa_rev >= 1) && (__mips_isa_rev < 6) -#define WEBP_USE_MIPS32 -#if (__mips_isa_rev >= 2) -#define WEBP_USE_MIPS32_R2 -#if defined(__mips_dspr2) || (defined(__mips_dsp_rev) && __mips_dsp_rev >= 2) -#define WEBP_USE_MIPS_DSP_R2 -#endif -#endif -#endif - -#if defined(__mips_msa) && defined(__mips_isa_rev) && (__mips_isa_rev >= 5) -#define WEBP_USE_MSA -#endif - -#ifndef WEBP_DSP_OMIT_C_CODE -#define WEBP_DSP_OMIT_C_CODE 1 -#endif - -#if defined(WEBP_USE_NEON) && WEBP_DSP_OMIT_C_CODE -#define WEBP_NEON_OMIT_C_CODE 1 -#else -#define WEBP_NEON_OMIT_C_CODE 0 -#endif - -#if !(LOCAL_CLANG_PREREQ(3,8) || LOCAL_GCC_PREREQ(4,8) || defined(__aarch64__)) -#define WEBP_NEON_WORK_AROUND_GCC 1 -#else -#define WEBP_NEON_WORK_AROUND_GCC 0 -#endif - -// This macro prevents thread_sanitizer from reporting known concurrent writes. -#define WEBP_TSAN_IGNORE_FUNCTION -#if defined(__has_feature) -#if __has_feature(thread_sanitizer) -#undef WEBP_TSAN_IGNORE_FUNCTION -#define WEBP_TSAN_IGNORE_FUNCTION __attribute__((no_sanitize_thread)) -#endif -#endif - -#if defined(WEBP_USE_THREAD) && !defined(_WIN32) -#include // NOLINT - -#define WEBP_DSP_INIT(func) do { \ - static volatile VP8CPUInfo func ## _last_cpuinfo_used = \ - (VP8CPUInfo)&func ## _last_cpuinfo_used; \ - static pthread_mutex_t func ## _lock = PTHREAD_MUTEX_INITIALIZER; \ - if (pthread_mutex_lock(&func ## _lock)) break; \ - if (func ## _last_cpuinfo_used != VP8GetCPUInfo) func(); \ - func ## _last_cpuinfo_used = VP8GetCPUInfo; \ - (void)pthread_mutex_unlock(&func ## _lock); \ -} while (0) -#else // !(defined(WEBP_USE_THREAD) && !defined(_WIN32)) -#define WEBP_DSP_INIT(func) do { \ - static volatile VP8CPUInfo func ## _last_cpuinfo_used = \ - (VP8CPUInfo)&func ## _last_cpuinfo_used; \ - if (func ## _last_cpuinfo_used == VP8GetCPUInfo) break; \ - func(); \ - func ## _last_cpuinfo_used = VP8GetCPUInfo; \ -} while (0) -#endif // defined(WEBP_USE_THREAD) && !defined(_WIN32) - -// Defines an Init + helper function that control multiple initialization of -// function pointers / tables. -/* Usage: - WEBP_DSP_INIT_FUNC(InitFunc) { - ...function body - } -*/ -#define WEBP_DSP_INIT_FUNC(name) \ - static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void); \ - WEBP_TSAN_IGNORE_FUNCTION void name(void) { \ - WEBP_DSP_INIT(name ## _body); \ - } \ - static WEBP_TSAN_IGNORE_FUNCTION void name ## _body(void) - -#define WEBP_UBSAN_IGNORE_UNDEF -#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW -#if defined(__clang__) && defined(__has_attribute) -#if __has_attribute(no_sanitize) -// This macro prevents the undefined behavior sanitizer from reporting -// failures. This is only meant to silence unaligned loads on platforms that -// are known to support them. -#undef WEBP_UBSAN_IGNORE_UNDEF -#define WEBP_UBSAN_IGNORE_UNDEF \ - __attribute__((no_sanitize("undefined"))) - -// This macro prevents the undefined behavior sanitizer from reporting -// failures related to unsigned integer overflows. This is only meant to -// silence cases where this well defined behavior is expected. -#undef WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW -#define WEBP_UBSAN_IGNORE_UNSIGNED_OVERFLOW \ - __attribute__((no_sanitize("unsigned-integer-overflow"))) -#endif -#endif - -// If 'ptr' is NULL, returns NULL. Otherwise returns 'ptr + off'. -// Prevents undefined behavior sanitizer nullptr-with-nonzero-offset warning. -#if !defined(WEBP_OFFSET_PTR) -#define WEBP_OFFSET_PTR(ptr, off) (((ptr) == NULL) ? NULL : ((ptr) + (off))) -#endif - -// Regularize the definition of WEBP_SWAP_16BIT_CSP (backward compatibility) -#if !defined(WEBP_SWAP_16BIT_CSP) -#define WEBP_SWAP_16BIT_CSP 0 -#endif - -// some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) -#if !defined(WORDS_BIGENDIAN) && \ - (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ - (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) -#define WORDS_BIGENDIAN -#endif - -typedef enum { - kSSE2, - kSSE3, - kSlowSSSE3, // special feature for slow SSSE3 architectures - kSSE4_1, - kAVX, - kAVX2, - kNEON, - kMIPS32, - kMIPSdspR2, - kMSA -} CPUFeature; -// returns true if the CPU supports the feature. -typedef int (*VP8CPUInfo)(CPUFeature feature); -WEBP_EXTERN VP8CPUInfo VP8GetCPUInfo; //------------------------------------------------------------------------------ // Init stub generator @@ -550,15 +332,6 @@ extern void WebPConvertARGBToUV_C(const uint32_t* argb, uint8_t* u, uint8_t* v, extern void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, uint8_t* u, uint8_t* v, int width); -// utilities for accurate RGB->YUV conversion -extern uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* src, const uint16_t* ref, - uint16_t* dst, int len); -extern void (*WebPSharpYUVUpdateRGB)(const int16_t* src, const int16_t* ref, - int16_t* dst, int len); -extern void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, - int len, - const uint16_t* best_y, uint16_t* out); - // Must be called before using the above. void WebPInitConvertARGBToYUV(void); diff --git a/thirdparty/libwebp/src/dsp/lossless.h b/thirdparty/libwebp/src/dsp/lossless.h index c26c6bca07a..de60d95d0b0 100644 --- a/thirdparty/libwebp/src/dsp/lossless.h +++ b/thirdparty/libwebp/src/dsp/lossless.h @@ -182,9 +182,9 @@ extern VP8LPredictorAddSubFunc VP8LPredictorsSub_C[16]; // ----------------------------------------------------------------------------- // Huffman-cost related functions. -typedef double (*VP8LCostFunc)(const uint32_t* population, int length); -typedef double (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, - int length); +typedef float (*VP8LCostFunc)(const uint32_t* population, int length); +typedef float (*VP8LCostCombinedFunc)(const uint32_t* X, const uint32_t* Y, + int length); typedef float (*VP8LCombinedShannonEntropyFunc)(const int X[256], const int Y[256]); @@ -198,7 +198,7 @@ typedef struct { // small struct to hold counters } VP8LStreaks; typedef struct { // small struct to hold bit entropy results - double entropy; // entropy + float entropy; // entropy uint32_t sum; // sum of the population int nonzeros; // number of non-zero elements in the population uint32_t max_val; // maximum value in the population diff --git a/thirdparty/libwebp/src/dsp/lossless_enc.c b/thirdparty/libwebp/src/dsp/lossless_enc.c index 1580631e387..de6c4ace5f0 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc.c @@ -402,7 +402,7 @@ static float FastLog2Slow_C(uint32_t v) { // Compute the combined Shanon's entropy for distribution {X} and {X+Y} static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) { int i; - double retval = 0.; + float retval = 0.f; int sumX = 0, sumXY = 0; for (i = 0; i < 256; ++i) { const int x = X[i]; @@ -418,7 +418,7 @@ static float CombinedShannonEntropy_C(const int X[256], const int Y[256]) { } } retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); - return (float)retval; + return retval; } void VP8LBitEntropyInit(VP8LBitEntropy* const entropy) { @@ -636,17 +636,17 @@ void VP8LBundleColorMap_C(const uint8_t* const row, int width, int xbits, //------------------------------------------------------------------------------ -static double ExtraCost_C(const uint32_t* population, int length) { +static float ExtraCost_C(const uint32_t* population, int length) { int i; - double cost = 0.; + float cost = 0.f; for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2]; return cost; } -static double ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, +static float ExtraCostCombined_C(const uint32_t* X, const uint32_t* Y, int length) { int i; - double cost = 0.; + float cost = 0.f; for (i = 2; i < length - 2; ++i) { const int xy = X[i + 2] + Y[i + 2]; cost += (i >> 1) * xy; diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c index 0412a093cf9..639f7866311 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_mips32.c @@ -103,8 +103,8 @@ static float FastLog2Slow_MIPS32(uint32_t v) { // cost += i * *(pop + 1); // pop += 2; // } -// return (double)cost; -static double ExtraCost_MIPS32(const uint32_t* const population, int length) { +// return (float)cost; +static float ExtraCost_MIPS32(const uint32_t* const population, int length) { int i, temp0, temp1; const uint32_t* pop = &population[4]; const uint32_t* const LoopEnd = &population[length]; @@ -130,7 +130,7 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) { : "memory", "hi", "lo" ); - return (double)((int64_t)temp0 << 32 | temp1); + return (float)((int64_t)temp0 << 32 | temp1); } // C version of this function: @@ -148,9 +148,9 @@ static double ExtraCost_MIPS32(const uint32_t* const population, int length) { // pX += 2; // pY += 2; // } -// return (double)cost; -static double ExtraCostCombined_MIPS32(const uint32_t* const X, - const uint32_t* const Y, int length) { +// return (float)cost; +static float ExtraCostCombined_MIPS32(const uint32_t* const X, + const uint32_t* const Y, int length) { int i, temp0, temp1, temp2, temp3; const uint32_t* pX = &X[4]; const uint32_t* pY = &Y[4]; @@ -183,7 +183,7 @@ static double ExtraCostCombined_MIPS32(const uint32_t* const X, : "memory", "hi", "lo" ); - return (double)((int64_t)temp0 << 32 | temp1); + return (float)((int64_t)temp0 << 32 | temp1); } #define HUFFMAN_COST_PASS \ @@ -347,24 +347,24 @@ static void GetCombinedEntropyUnrefined_MIPS32(const uint32_t X[], static void AddVector_MIPS32(const uint32_t* pa, const uint32_t* pb, uint32_t* pout, int size) { uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - const uint32_t end = ((size) / 4) * 4; + const int end = ((size) / 4) * 4; const uint32_t* const LoopEnd = pa + end; int i; ASM_START ADD_TO_OUT(0, 4, 8, 12, 1, pa, pb, pout) ASM_END_0 - for (i = end; i < size; ++i) pout[i] = pa[i] + pb[i]; + for (i = 0; i < size - end; ++i) pout[i] = pa[i] + pb[i]; } static void AddVectorEq_MIPS32(const uint32_t* pa, uint32_t* pout, int size) { uint32_t temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7; - const uint32_t end = ((size) / 4) * 4; + const int end = ((size) / 4) * 4; const uint32_t* const LoopEnd = pa + end; int i; ASM_START ADD_TO_OUT(0, 4, 8, 12, 0, pa, pout, pout) ASM_END_1 - for (i = end; i < size; ++i) pout[i] += pa[i]; + for (i = 0; i < size - end; ++i) pout[i] += pa[i]; } #undef ASM_END_1 diff --git a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c index b2f83b871ca..948001a3d55 100644 --- a/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c +++ b/thirdparty/libwebp/src/dsp/lossless_enc_sse2.c @@ -239,7 +239,7 @@ static void AddVectorEq_SSE2(const uint32_t* a, uint32_t* out, int size) { static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) { int i; - double retval = 0.; + float retval = 0.f; int sumX = 0, sumXY = 0; const __m128i zero = _mm_setzero_si128(); @@ -273,7 +273,7 @@ static float CombinedShannonEntropy_SSE2(const int X[256], const int Y[256]) { } } retval += VP8LFastSLog2(sumX) + VP8LFastSLog2(sumXY); - return (float)retval; + return retval; } #else diff --git a/thirdparty/libwebp/src/dsp/yuv.c b/thirdparty/libwebp/src/dsp/yuv.c index 48466f8b116..d16c13d3ca3 100644 --- a/thirdparty/libwebp/src/dsp/yuv.c +++ b/thirdparty/libwebp/src/dsp/yuv.c @@ -194,50 +194,6 @@ void WebPConvertRGBA32ToUV_C(const uint16_t* rgb, //----------------------------------------------------------------------------- -#if !WEBP_NEON_OMIT_C_CODE -#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic -static uint16_t clip_y(int v) { - return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; -} - -static uint64_t SharpYUVUpdateY_C(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len) { - uint64_t diff = 0; - int i; - for (i = 0; i < len; ++i) { - const int diff_y = ref[i] - src[i]; - const int new_y = (int)dst[i] + diff_y; - dst[i] = clip_y(new_y); - diff += (uint64_t)abs(diff_y); - } - return diff; -} - -static void SharpYUVUpdateRGB_C(const int16_t* ref, const int16_t* src, - int16_t* dst, int len) { - int i; - for (i = 0; i < len; ++i) { - const int diff_uv = ref[i] - src[i]; - dst[i] += diff_uv; - } -} - -static void SharpYUVFilterRow_C(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out) { - int i; - for (i = 0; i < len; ++i, ++A, ++B) { - const int v0 = (A[0] * 9 + A[1] * 3 + B[0] * 3 + B[1] + 8) >> 4; - const int v1 = (A[1] * 9 + A[0] * 3 + B[1] * 3 + B[0] + 8) >> 4; - out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); - out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); - } -} -#endif // !WEBP_NEON_OMIT_C_CODE - -#undef MAX_Y - -//----------------------------------------------------------------------------- - void (*WebPConvertRGB24ToY)(const uint8_t* rgb, uint8_t* y, int width); void (*WebPConvertBGR24ToY)(const uint8_t* bgr, uint8_t* y, int width); void (*WebPConvertRGBA32ToUV)(const uint16_t* rgb, @@ -247,18 +203,9 @@ void (*WebPConvertARGBToY)(const uint32_t* argb, uint8_t* y, int width); void (*WebPConvertARGBToUV)(const uint32_t* argb, uint8_t* u, uint8_t* v, int src_width, int do_store); -uint64_t (*WebPSharpYUVUpdateY)(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len); -void (*WebPSharpYUVUpdateRGB)(const int16_t* ref, const int16_t* src, - int16_t* dst, int len); -void (*WebPSharpYUVFilterRow)(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out); - extern void WebPInitConvertARGBToYUVSSE2(void); extern void WebPInitConvertARGBToYUVSSE41(void); extern void WebPInitConvertARGBToYUVNEON(void); -extern void WebPInitSharpYUVSSE2(void); -extern void WebPInitSharpYUVNEON(void); WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { WebPConvertARGBToY = ConvertARGBToY_C; @@ -269,17 +216,10 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { WebPConvertRGBA32ToUV = WebPConvertRGBA32ToUV_C; -#if !WEBP_NEON_OMIT_C_CODE - WebPSharpYUVUpdateY = SharpYUVUpdateY_C; - WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_C; - WebPSharpYUVFilterRow = SharpYUVFilterRow_C; -#endif - if (VP8GetCPUInfo != NULL) { #if defined(WEBP_HAVE_SSE2) if (VP8GetCPUInfo(kSSE2)) { WebPInitConvertARGBToYUVSSE2(); - WebPInitSharpYUVSSE2(); } #endif // WEBP_HAVE_SSE2 #if defined(WEBP_HAVE_SSE41) @@ -293,7 +233,6 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { if (WEBP_NEON_OMIT_C_CODE || (VP8GetCPUInfo != NULL && VP8GetCPUInfo(kNEON))) { WebPInitConvertARGBToYUVNEON(); - WebPInitSharpYUVNEON(); } #endif // WEBP_HAVE_NEON @@ -302,7 +241,4 @@ WEBP_DSP_INIT_FUNC(WebPInitConvertARGBToYUV) { assert(WebPConvertRGB24ToY != NULL); assert(WebPConvertBGR24ToY != NULL); assert(WebPConvertRGBA32ToUV != NULL); - assert(WebPSharpYUVUpdateY != NULL); - assert(WebPSharpYUVUpdateRGB != NULL); - assert(WebPSharpYUVFilterRow != NULL); } diff --git a/thirdparty/libwebp/src/dsp/yuv_neon.c b/thirdparty/libwebp/src/dsp/yuv_neon.c index a34d60248f6..ff77b009801 100644 --- a/thirdparty/libwebp/src/dsp/yuv_neon.c +++ b/thirdparty/libwebp/src/dsp/yuv_neon.c @@ -173,116 +173,8 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVNEON(void) { WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_NEON; } -//------------------------------------------------------------------------------ - -#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic -static uint16_t clip_y_NEON(int v) { - return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; -} - -static uint64_t SharpYUVUpdateY_NEON(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len) { - int i; - const int16x8_t zero = vdupq_n_s16(0); - const int16x8_t max = vdupq_n_s16(MAX_Y); - uint64x2_t sum = vdupq_n_u64(0); - uint64_t diff; - - for (i = 0; i + 8 <= len; i += 8) { - const int16x8_t A = vreinterpretq_s16_u16(vld1q_u16(ref + i)); - const int16x8_t B = vreinterpretq_s16_u16(vld1q_u16(src + i)); - const int16x8_t C = vreinterpretq_s16_u16(vld1q_u16(dst + i)); - const int16x8_t D = vsubq_s16(A, B); // diff_y - const int16x8_t F = vaddq_s16(C, D); // new_y - const uint16x8_t H = - vreinterpretq_u16_s16(vmaxq_s16(vminq_s16(F, max), zero)); - const int16x8_t I = vabsq_s16(D); // abs(diff_y) - vst1q_u16(dst + i, H); - sum = vpadalq_u32(sum, vpaddlq_u16(vreinterpretq_u16_s16(I))); - } - diff = vgetq_lane_u64(sum, 0) + vgetq_lane_u64(sum, 1); - for (; i < len; ++i) { - const int diff_y = ref[i] - src[i]; - const int new_y = (int)(dst[i]) + diff_y; - dst[i] = clip_y_NEON(new_y); - diff += (uint64_t)(abs(diff_y)); - } - return diff; -} - -static void SharpYUVUpdateRGB_NEON(const int16_t* ref, const int16_t* src, - int16_t* dst, int len) { - int i; - for (i = 0; i + 8 <= len; i += 8) { - const int16x8_t A = vld1q_s16(ref + i); - const int16x8_t B = vld1q_s16(src + i); - const int16x8_t C = vld1q_s16(dst + i); - const int16x8_t D = vsubq_s16(A, B); // diff_uv - const int16x8_t E = vaddq_s16(C, D); // new_uv - vst1q_s16(dst + i, E); - } - for (; i < len; ++i) { - const int diff_uv = ref[i] - src[i]; - dst[i] += diff_uv; - } -} - -static void SharpYUVFilterRow_NEON(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out) { - int i; - const int16x8_t max = vdupq_n_s16(MAX_Y); - const int16x8_t zero = vdupq_n_s16(0); - for (i = 0; i + 8 <= len; i += 8) { - const int16x8_t a0 = vld1q_s16(A + i + 0); - const int16x8_t a1 = vld1q_s16(A + i + 1); - const int16x8_t b0 = vld1q_s16(B + i + 0); - const int16x8_t b1 = vld1q_s16(B + i + 1); - const int16x8_t a0b1 = vaddq_s16(a0, b1); - const int16x8_t a1b0 = vaddq_s16(a1, b0); - const int16x8_t a0a1b0b1 = vaddq_s16(a0b1, a1b0); // A0+A1+B0+B1 - const int16x8_t a0b1_2 = vaddq_s16(a0b1, a0b1); // 2*(A0+B1) - const int16x8_t a1b0_2 = vaddq_s16(a1b0, a1b0); // 2*(A1+B0) - const int16x8_t c0 = vshrq_n_s16(vaddq_s16(a0b1_2, a0a1b0b1), 3); - const int16x8_t c1 = vshrq_n_s16(vaddq_s16(a1b0_2, a0a1b0b1), 3); - const int16x8_t d0 = vaddq_s16(c1, a0); - const int16x8_t d1 = vaddq_s16(c0, a1); - const int16x8_t e0 = vrshrq_n_s16(d0, 1); - const int16x8_t e1 = vrshrq_n_s16(d1, 1); - const int16x8x2_t f = vzipq_s16(e0, e1); - const int16x8_t g0 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 0)); - const int16x8_t g1 = vreinterpretq_s16_u16(vld1q_u16(best_y + 2 * i + 8)); - const int16x8_t h0 = vaddq_s16(g0, f.val[0]); - const int16x8_t h1 = vaddq_s16(g1, f.val[1]); - const int16x8_t i0 = vmaxq_s16(vminq_s16(h0, max), zero); - const int16x8_t i1 = vmaxq_s16(vminq_s16(h1, max), zero); - vst1q_u16(out + 2 * i + 0, vreinterpretq_u16_s16(i0)); - vst1q_u16(out + 2 * i + 8, vreinterpretq_u16_s16(i1)); - } - for (; i < len; ++i) { - const int a0b1 = A[i + 0] + B[i + 1]; - const int a1b0 = A[i + 1] + B[i + 0]; - const int a0a1b0b1 = a0b1 + a1b0 + 8; - const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; - const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; - out[2 * i + 0] = clip_y_NEON(best_y[2 * i + 0] + v0); - out[2 * i + 1] = clip_y_NEON(best_y[2 * i + 1] + v1); - } -} -#undef MAX_Y - -//------------------------------------------------------------------------------ - -extern void WebPInitSharpYUVNEON(void); - -WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVNEON(void) { - WebPSharpYUVUpdateY = SharpYUVUpdateY_NEON; - WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_NEON; - WebPSharpYUVFilterRow = SharpYUVFilterRow_NEON; -} - #else // !WEBP_USE_NEON WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVNEON) -WEBP_DSP_INIT_STUB(WebPInitSharpYUVNEON) #endif // WEBP_USE_NEON diff --git a/thirdparty/libwebp/src/dsp/yuv_sse2.c b/thirdparty/libwebp/src/dsp/yuv_sse2.c index baa48d53717..970bbb78840 100644 --- a/thirdparty/libwebp/src/dsp/yuv_sse2.c +++ b/thirdparty/libwebp/src/dsp/yuv_sse2.c @@ -747,128 +747,9 @@ WEBP_TSAN_IGNORE_FUNCTION void WebPInitConvertARGBToYUVSSE2(void) { WebPConvertRGBA32ToUV = ConvertRGBA32ToUV_SSE2; } -//------------------------------------------------------------------------------ - -#define MAX_Y ((1 << 10) - 1) // 10b precision over 16b-arithmetic -static uint16_t clip_y(int v) { - return (v < 0) ? 0 : (v > MAX_Y) ? MAX_Y : (uint16_t)v; -} - -static uint64_t SharpYUVUpdateY_SSE2(const uint16_t* ref, const uint16_t* src, - uint16_t* dst, int len) { - uint64_t diff = 0; - uint32_t tmp[4]; - int i; - const __m128i zero = _mm_setzero_si128(); - const __m128i max = _mm_set1_epi16(MAX_Y); - const __m128i one = _mm_set1_epi16(1); - __m128i sum = zero; - - for (i = 0; i + 8 <= len; i += 8) { - const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); - const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); - const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); - const __m128i D = _mm_sub_epi16(A, B); // diff_y - const __m128i E = _mm_cmpgt_epi16(zero, D); // sign (-1 or 0) - const __m128i F = _mm_add_epi16(C, D); // new_y - const __m128i G = _mm_or_si128(E, one); // -1 or 1 - const __m128i H = _mm_max_epi16(_mm_min_epi16(F, max), zero); - const __m128i I = _mm_madd_epi16(D, G); // sum(abs(...)) - _mm_storeu_si128((__m128i*)(dst + i), H); - sum = _mm_add_epi32(sum, I); - } - _mm_storeu_si128((__m128i*)tmp, sum); - diff = tmp[3] + tmp[2] + tmp[1] + tmp[0]; - for (; i < len; ++i) { - const int diff_y = ref[i] - src[i]; - const int new_y = (int)dst[i] + diff_y; - dst[i] = clip_y(new_y); - diff += (uint64_t)abs(diff_y); - } - return diff; -} - -static void SharpYUVUpdateRGB_SSE2(const int16_t* ref, const int16_t* src, - int16_t* dst, int len) { - int i = 0; - for (i = 0; i + 8 <= len; i += 8) { - const __m128i A = _mm_loadu_si128((const __m128i*)(ref + i)); - const __m128i B = _mm_loadu_si128((const __m128i*)(src + i)); - const __m128i C = _mm_loadu_si128((const __m128i*)(dst + i)); - const __m128i D = _mm_sub_epi16(A, B); // diff_uv - const __m128i E = _mm_add_epi16(C, D); // new_uv - _mm_storeu_si128((__m128i*)(dst + i), E); - } - for (; i < len; ++i) { - const int diff_uv = ref[i] - src[i]; - dst[i] += diff_uv; - } -} - -static void SharpYUVFilterRow_SSE2(const int16_t* A, const int16_t* B, int len, - const uint16_t* best_y, uint16_t* out) { - int i; - const __m128i kCst8 = _mm_set1_epi16(8); - const __m128i max = _mm_set1_epi16(MAX_Y); - const __m128i zero = _mm_setzero_si128(); - for (i = 0; i + 8 <= len; i += 8) { - const __m128i a0 = _mm_loadu_si128((const __m128i*)(A + i + 0)); - const __m128i a1 = _mm_loadu_si128((const __m128i*)(A + i + 1)); - const __m128i b0 = _mm_loadu_si128((const __m128i*)(B + i + 0)); - const __m128i b1 = _mm_loadu_si128((const __m128i*)(B + i + 1)); - const __m128i a0b1 = _mm_add_epi16(a0, b1); - const __m128i a1b0 = _mm_add_epi16(a1, b0); - const __m128i a0a1b0b1 = _mm_add_epi16(a0b1, a1b0); // A0+A1+B0+B1 - const __m128i a0a1b0b1_8 = _mm_add_epi16(a0a1b0b1, kCst8); - const __m128i a0b1_2 = _mm_add_epi16(a0b1, a0b1); // 2*(A0+B1) - const __m128i a1b0_2 = _mm_add_epi16(a1b0, a1b0); // 2*(A1+B0) - const __m128i c0 = _mm_srai_epi16(_mm_add_epi16(a0b1_2, a0a1b0b1_8), 3); - const __m128i c1 = _mm_srai_epi16(_mm_add_epi16(a1b0_2, a0a1b0b1_8), 3); - const __m128i d0 = _mm_add_epi16(c1, a0); - const __m128i d1 = _mm_add_epi16(c0, a1); - const __m128i e0 = _mm_srai_epi16(d0, 1); - const __m128i e1 = _mm_srai_epi16(d1, 1); - const __m128i f0 = _mm_unpacklo_epi16(e0, e1); - const __m128i f1 = _mm_unpackhi_epi16(e0, e1); - const __m128i g0 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 0)); - const __m128i g1 = _mm_loadu_si128((const __m128i*)(best_y + 2 * i + 8)); - const __m128i h0 = _mm_add_epi16(g0, f0); - const __m128i h1 = _mm_add_epi16(g1, f1); - const __m128i i0 = _mm_max_epi16(_mm_min_epi16(h0, max), zero); - const __m128i i1 = _mm_max_epi16(_mm_min_epi16(h1, max), zero); - _mm_storeu_si128((__m128i*)(out + 2 * i + 0), i0); - _mm_storeu_si128((__m128i*)(out + 2 * i + 8), i1); - } - for (; i < len; ++i) { - // (9 * A0 + 3 * A1 + 3 * B0 + B1 + 8) >> 4 = - // = (8 * A0 + 2 * (A1 + B0) + (A0 + A1 + B0 + B1 + 8)) >> 4 - // We reuse the common sub-expressions. - const int a0b1 = A[i + 0] + B[i + 1]; - const int a1b0 = A[i + 1] + B[i + 0]; - const int a0a1b0b1 = a0b1 + a1b0 + 8; - const int v0 = (8 * A[i + 0] + 2 * a1b0 + a0a1b0b1) >> 4; - const int v1 = (8 * A[i + 1] + 2 * a0b1 + a0a1b0b1) >> 4; - out[2 * i + 0] = clip_y(best_y[2 * i + 0] + v0); - out[2 * i + 1] = clip_y(best_y[2 * i + 1] + v1); - } -} - -#undef MAX_Y - -//------------------------------------------------------------------------------ - -extern void WebPInitSharpYUVSSE2(void); - -WEBP_TSAN_IGNORE_FUNCTION void WebPInitSharpYUVSSE2(void) { - WebPSharpYUVUpdateY = SharpYUVUpdateY_SSE2; - WebPSharpYUVUpdateRGB = SharpYUVUpdateRGB_SSE2; - WebPSharpYUVFilterRow = SharpYUVFilterRow_SSE2; -} - #else // !WEBP_USE_SSE2 WEBP_DSP_INIT_STUB(WebPInitSamplersSSE2) WEBP_DSP_INIT_STUB(WebPInitConvertARGBToYUVSSE2) -WEBP_DSP_INIT_STUB(WebPInitSharpYUVSSE2) #endif // WEBP_USE_SSE2 diff --git a/thirdparty/libwebp/src/enc/alpha_enc.c b/thirdparty/libwebp/src/enc/alpha_enc.c index 0b54f3e6ec2..f7c02690e3c 100644 --- a/thirdparty/libwebp/src/enc/alpha_enc.c +++ b/thirdparty/libwebp/src/enc/alpha_enc.c @@ -86,7 +86,7 @@ static int EncodeLossless(const uint8_t* const data, int width, int height, // a decoder bug related to alpha with color cache. // See: https://code.google.com/p/webp/issues/detail?id=239 // Need to re-enable this later. - ok = (VP8LEncodeStream(&config, &picture, bw, 0 /*use_cache*/) == VP8_ENC_OK); + ok = VP8LEncodeStream(&config, &picture, bw, /*use_cache=*/0); WebPPictureFree(&picture); ok = ok && !bw->error_; if (!ok) { diff --git a/thirdparty/libwebp/src/enc/backward_references_cost_enc.c b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c index 516abd73eb4..6968ef3c9f3 100644 --- a/thirdparty/libwebp/src/enc/backward_references_cost_enc.c +++ b/thirdparty/libwebp/src/enc/backward_references_cost_enc.c @@ -15,10 +15,11 @@ // #include +#include +#include "src/dsp/lossless_common.h" #include "src/enc/backward_references_enc.h" #include "src/enc/histogram_enc.h" -#include "src/dsp/lossless_common.h" #include "src/utils/color_cache_utils.h" #include "src/utils/utils.h" @@ -30,15 +31,15 @@ extern void VP8LBackwardRefsCursorAdd(VP8LBackwardRefs* const refs, const PixOrCopy v); typedef struct { - double alpha_[VALUES_IN_BYTE]; - double red_[VALUES_IN_BYTE]; - double blue_[VALUES_IN_BYTE]; - double distance_[NUM_DISTANCE_CODES]; - double* literal_; + float alpha_[VALUES_IN_BYTE]; + float red_[VALUES_IN_BYTE]; + float blue_[VALUES_IN_BYTE]; + float distance_[NUM_DISTANCE_CODES]; + float* literal_; } CostModel; static void ConvertPopulationCountTableToBitEstimates( - int num_symbols, const uint32_t population_counts[], double output[]) { + int num_symbols, const uint32_t population_counts[], float output[]) { uint32_t sum = 0; int nonzeros = 0; int i; @@ -51,7 +52,7 @@ static void ConvertPopulationCountTableToBitEstimates( if (nonzeros <= 1) { memset(output, 0, num_symbols * sizeof(*output)); } else { - const double logsum = VP8LFastLog2(sum); + const float logsum = VP8LFastLog2(sum); for (i = 0; i < num_symbols; ++i) { output[i] = logsum - VP8LFastLog2(population_counts[i]); } @@ -75,8 +76,8 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, } ConvertPopulationCountTableToBitEstimates( - VP8LHistogramNumCodes(histo->palette_code_bits_), - histo->literal_, m->literal_); + VP8LHistogramNumCodes(histo->palette_code_bits_), histo->literal_, + m->literal_); ConvertPopulationCountTableToBitEstimates( VALUES_IN_BYTE, histo->red_, m->red_); ConvertPopulationCountTableToBitEstimates( @@ -92,27 +93,27 @@ static int CostModelBuild(CostModel* const m, int xsize, int cache_bits, return ok; } -static WEBP_INLINE double GetLiteralCost(const CostModel* const m, uint32_t v) { +static WEBP_INLINE float GetLiteralCost(const CostModel* const m, uint32_t v) { return m->alpha_[v >> 24] + m->red_[(v >> 16) & 0xff] + m->literal_[(v >> 8) & 0xff] + m->blue_[v & 0xff]; } -static WEBP_INLINE double GetCacheCost(const CostModel* const m, uint32_t idx) { +static WEBP_INLINE float GetCacheCost(const CostModel* const m, uint32_t idx) { const int literal_idx = VALUES_IN_BYTE + NUM_LENGTH_CODES + idx; return m->literal_[literal_idx]; } -static WEBP_INLINE double GetLengthCost(const CostModel* const m, - uint32_t length) { +static WEBP_INLINE float GetLengthCost(const CostModel* const m, + uint32_t length) { int code, extra_bits; VP8LPrefixEncodeBits(length, &code, &extra_bits); return m->literal_[VALUES_IN_BYTE + code] + extra_bits; } -static WEBP_INLINE double GetDistanceCost(const CostModel* const m, - uint32_t distance) { +static WEBP_INLINE float GetDistanceCost(const CostModel* const m, + uint32_t distance) { int code, extra_bits; VP8LPrefixEncodeBits(distance, &code, &extra_bits); return m->distance_[code] + extra_bits; @@ -122,20 +123,20 @@ static WEBP_INLINE void AddSingleLiteralWithCostModel( const uint32_t* const argb, VP8LColorCache* const hashers, const CostModel* const cost_model, int idx, int use_color_cache, float prev_cost, float* const cost, uint16_t* const dist_array) { - double cost_val = prev_cost; + float cost_val = prev_cost; const uint32_t color = argb[idx]; const int ix = use_color_cache ? VP8LColorCacheContains(hashers, color) : -1; if (ix >= 0) { // use_color_cache is true and hashers contains color - const double mul0 = 0.68; + const float mul0 = 0.68f; cost_val += GetCacheCost(cost_model, ix) * mul0; } else { - const double mul1 = 0.82; + const float mul1 = 0.82f; if (use_color_cache) VP8LColorCacheInsert(hashers, color); cost_val += GetLiteralCost(cost_model, color) * mul1; } if (cost[idx] > cost_val) { - cost[idx] = (float)cost_val; + cost[idx] = cost_val; dist_array[idx] = 1; // only one is inserted. } } @@ -172,7 +173,7 @@ struct CostInterval { // The GetLengthCost(cost_model, k) are cached in a CostCacheInterval. typedef struct { - double cost_; + float cost_; int start_; int end_; // Exclusive. } CostCacheInterval; @@ -187,7 +188,7 @@ typedef struct { int count_; // The number of stored intervals. CostCacheInterval* cache_intervals_; size_t cache_intervals_size_; - double cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). + float cost_cache_[MAX_LENGTH]; // Contains the GetLengthCost(cost_model, k). float* costs_; uint16_t* dist_array_; // Most of the time, we only need few intervals -> use a free-list, to avoid @@ -262,10 +263,13 @@ static int CostManagerInit(CostManager* const manager, CostManagerInitFreeList(manager); // Fill in the cost_cache_. - manager->cache_intervals_size_ = 1; - manager->cost_cache_[0] = GetLengthCost(cost_model, 0); - for (i = 1; i < cost_cache_size; ++i) { + // Has to be done in two passes due to a GCC bug on i686 + // related to https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323 + for (i = 0; i < cost_cache_size; ++i) { manager->cost_cache_[i] = GetLengthCost(cost_model, i); + } + manager->cache_intervals_size_ = 1; + for (i = 1; i < cost_cache_size; ++i) { // Get the number of bound intervals. if (manager->cost_cache_[i] != manager->cost_cache_[i - 1]) { ++manager->cache_intervals_size_; @@ -294,7 +298,7 @@ static int CostManagerInit(CostManager* const manager, cur->end_ = 1; cur->cost_ = manager->cost_cache_[0]; for (i = 1; i < cost_cache_size; ++i) { - const double cost_val = manager->cost_cache_[i]; + const float cost_val = manager->cost_cache_[i]; if (cost_val != cur->cost_) { ++cur; // Initialize an interval. @@ -303,6 +307,8 @@ static int CostManagerInit(CostManager* const manager, } cur->end_ = i + 1; } + assert((size_t)(cur - manager->cache_intervals_) + 1 == + manager->cache_intervals_size_); } manager->costs_ = (float*)WebPSafeMalloc(pix_count, sizeof(*manager->costs_)); @@ -311,7 +317,7 @@ static int CostManagerInit(CostManager* const manager, return 0; } // Set the initial costs_ high for every pixel as we will keep the minimum. - for (i = 0; i < pix_count; ++i) manager->costs_[i] = 1e38f; + for (i = 0; i < pix_count; ++i) manager->costs_[i] = FLT_MAX; return 1; } @@ -457,7 +463,7 @@ static WEBP_INLINE void InsertInterval(CostManager* const manager, // If handling the interval or one of its subintervals becomes to heavy, its // contribution is added to the costs right away. static WEBP_INLINE void PushInterval(CostManager* const manager, - double distance_cost, int position, + float distance_cost, int position, int len) { size_t i; CostInterval* interval = manager->head_; @@ -474,7 +480,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, const int k = j - position; float cost_tmp; assert(k >= 0 && k < MAX_LENGTH); - cost_tmp = (float)(distance_cost + manager->cost_cache_[k]); + cost_tmp = distance_cost + manager->cost_cache_[k]; if (manager->costs_[j] > cost_tmp) { manager->costs_[j] = cost_tmp; @@ -492,7 +498,7 @@ static WEBP_INLINE void PushInterval(CostManager* const manager, const int end = position + (cost_cache_intervals[i].end_ > len ? len : cost_cache_intervals[i].end_); - const float cost = (float)(distance_cost + cost_cache_intervals[i].cost_); + const float cost = distance_cost + cost_cache_intervals[i].cost_; for (; interval != NULL && interval->start_ < end; interval = interval_next) { @@ -570,22 +576,21 @@ static int BackwardReferencesHashChainDistanceOnly( const int pix_count = xsize * ysize; const int use_color_cache = (cache_bits > 0); const size_t literal_array_size = - sizeof(double) * (NUM_LITERAL_CODES + NUM_LENGTH_CODES + - ((cache_bits > 0) ? (1 << cache_bits) : 0)); + sizeof(float) * (VP8LHistogramNumCodes(cache_bits)); const size_t cost_model_size = sizeof(CostModel) + literal_array_size; CostModel* const cost_model = (CostModel*)WebPSafeCalloc(1ULL, cost_model_size); VP8LColorCache hashers; CostManager* cost_manager = - (CostManager*)WebPSafeMalloc(1ULL, sizeof(*cost_manager)); + (CostManager*)WebPSafeCalloc(1ULL, sizeof(*cost_manager)); int offset_prev = -1, len_prev = -1; - double offset_cost = -1; + float offset_cost = -1.f; int first_offset_is_constant = -1; // initialized with 'impossible' value int reach = 0; if (cost_model == NULL || cost_manager == NULL) goto Error; - cost_model->literal_ = (double*)(cost_model + 1); + cost_model->literal_ = (float*)(cost_model + 1); if (use_color_cache) { cc_init = VP8LColorCacheInit(&hashers, cache_bits); if (!cc_init) goto Error; @@ -675,7 +680,7 @@ static int BackwardReferencesHashChainDistanceOnly( } ok = !refs->error_; -Error: + Error: if (cc_init) VP8LColorCacheClear(&hashers); CostManagerClear(cost_manager); WebPSafeFree(cost_model); diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.c b/thirdparty/libwebp/src/enc/backward_references_enc.c index 519b36a0915..49a0fac0345 100644 --- a/thirdparty/libwebp/src/enc/backward_references_enc.c +++ b/thirdparty/libwebp/src/enc/backward_references_enc.c @@ -10,6 +10,8 @@ // Author: Jyrki Alakuijala (jyrki@google.com) // +#include "src/enc/backward_references_enc.h" + #include #include #include @@ -17,10 +19,11 @@ #include "src/dsp/dsp.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" -#include "src/enc/backward_references_enc.h" #include "src/enc/histogram_enc.h" +#include "src/enc/vp8i_enc.h" #include "src/utils/color_cache_utils.h" #include "src/utils/utils.h" +#include "src/webp/encode.h" #define MIN_BLOCK_SIZE 256 // minimum block size for backward references @@ -255,10 +258,13 @@ static WEBP_INLINE int MaxFindCopyLength(int len) { int VP8LHashChainFill(VP8LHashChain* const p, int quality, const uint32_t* const argb, int xsize, int ysize, - int low_effort) { + int low_effort, const WebPPicture* const pic, + int percent_range, int* const percent) { const int size = xsize * ysize; const int iter_max = GetMaxItersForQuality(quality); const uint32_t window_size = GetWindowSizeForHashChain(quality, xsize); + int remaining_percent = percent_range; + int percent_start = *percent; int pos; int argb_comp; uint32_t base_position; @@ -276,7 +282,13 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, hash_to_first_index = (int32_t*)WebPSafeMalloc(HASH_SIZE, sizeof(*hash_to_first_index)); - if (hash_to_first_index == NULL) return 0; + if (hash_to_first_index == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } + + percent_range = remaining_percent / 2; + remaining_percent -= percent_range; // Set the int32_t array to -1. memset(hash_to_first_index, 0xff, HASH_SIZE * sizeof(*hash_to_first_index)); @@ -323,12 +335,22 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, hash_to_first_index[hash_code] = pos++; argb_comp = argb_comp_next; } + + if (!WebPReportProgress( + pic, percent_start + percent_range * pos / (size - 2), percent)) { + WebPSafeFree(hash_to_first_index); + return 0; + } } // Process the penultimate pixel. chain[pos] = hash_to_first_index[GetPixPairHash64(argb + pos)]; WebPSafeFree(hash_to_first_index); + percent_start += percent_range; + if (!WebPReportProgress(pic, percent_start, percent)) return 0; + percent_range = remaining_percent; + // Find the best match interval at each pixel, defined by an offset to the // pixel and a length. The right-most pixel cannot match anything to the right // (hence a best length of 0) and the left-most pixel nothing to the left @@ -417,8 +439,17 @@ int VP8LHashChainFill(VP8LHashChain* const p, int quality, max_base_position = base_position; } } + + if (!WebPReportProgress(pic, + percent_start + percent_range * + (size - 2 - base_position) / + (size - 2), + percent)) { + return 0; + } } - return 1; + + return WebPReportProgress(pic, percent_start + percent_range, percent); } static WEBP_INLINE void AddSingleLiteral(uint32_t pixel, int use_color_cache, @@ -728,7 +759,7 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, int* const best_cache_bits) { int i; const int cache_bits_max = (quality <= 25) ? 0 : *best_cache_bits; - double entropy_min = MAX_ENTROPY; + float entropy_min = MAX_ENTROPY; int cc_init[MAX_COLOR_CACHE_BITS + 1] = { 0 }; VP8LColorCache hashers[MAX_COLOR_CACHE_BITS + 1]; VP8LRefsCursor c = VP8LRefsCursorInit(refs); @@ -813,14 +844,14 @@ static int CalculateBestCacheSize(const uint32_t* argb, int quality, } for (i = 0; i <= cache_bits_max; ++i) { - const double entropy = VP8LHistogramEstimateBits(histos[i]); + const float entropy = VP8LHistogramEstimateBits(histos[i]); if (i == 0 || entropy < entropy_min) { entropy_min = entropy; *best_cache_bits = i; } } ok = 1; -Error: + Error: for (i = 0; i <= cache_bits_max; ++i) { if (cc_init[i]) VP8LColorCacheClear(&hashers[i]); VP8LFreeHistogram(histos[i]); @@ -890,7 +921,7 @@ static int GetBackwardReferences(int width, int height, int i, lz77_type; // Index 0 is for a color cache, index 1 for no cache (if needed). int lz77_types_best[2] = {0, 0}; - double bit_costs_best[2] = {DBL_MAX, DBL_MAX}; + float bit_costs_best[2] = {FLT_MAX, FLT_MAX}; VP8LHashChain hash_chain_box; VP8LBackwardRefs* const refs_tmp = &refs[do_no_cache ? 2 : 1]; int status = 0; @@ -902,7 +933,7 @@ static int GetBackwardReferences(int width, int height, for (lz77_type = 1; lz77_types_to_try; lz77_types_to_try &= ~lz77_type, lz77_type <<= 1) { int res = 0; - double bit_cost = 0.; + float bit_cost = 0.f; if ((lz77_types_to_try & lz77_type) == 0) continue; switch (lz77_type) { case kLZ77RLE: @@ -976,15 +1007,16 @@ static int GetBackwardReferences(int width, int height, const VP8LHashChain* const hash_chain_tmp = (lz77_types_best[i] == kLZ77Standard) ? hash_chain : &hash_chain_box; const int cache_bits = (i == 1) ? 0 : *cache_bits_best; - if (VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, - hash_chain_tmp, &refs[i], - refs_tmp)) { - double bit_cost_trace; - VP8LHistogramCreate(histo, refs_tmp, cache_bits); - bit_cost_trace = VP8LHistogramEstimateBits(histo); - if (bit_cost_trace < bit_costs_best[i]) { - BackwardRefsSwap(refs_tmp, &refs[i]); - } + float bit_cost_trace; + if (!VP8LBackwardReferencesTraceBackwards(width, height, argb, cache_bits, + hash_chain_tmp, &refs[i], + refs_tmp)) { + goto Error; + } + VP8LHistogramCreate(histo, refs_tmp, cache_bits); + bit_cost_trace = VP8LHistogramEstimateBits(histo); + if (bit_cost_trace < bit_costs_best[i]) { + BackwardRefsSwap(refs_tmp, &refs[i]); } } @@ -1000,31 +1032,37 @@ static int GetBackwardReferences(int width, int height, } status = 1; -Error: + Error: VP8LHashChainClear(&hash_chain_box); VP8LFreeHistogram(histo); return status; } -WebPEncodingError VP8LGetBackwardReferences( +int VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, - int* const cache_bits_best) { + int* const cache_bits_best, const WebPPicture* const pic, int percent_range, + int* const percent) { if (low_effort) { VP8LBackwardRefs* refs_best; *cache_bits_best = cache_bits_max; refs_best = GetBackwardReferencesLowEffort( width, height, argb, cache_bits_best, hash_chain, refs); - if (refs_best == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + if (refs_best == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } // Set it in first position. BackwardRefsSwap(refs_best, &refs[0]); } else { if (!GetBackwardReferences(width, height, argb, quality, lz77_types_to_try, cache_bits_max, do_no_cache, hash_chain, refs, cache_bits_best)) { - return VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; } } - return VP8_ENC_OK; + + return WebPReportProgress(pic, *percent + percent_range, percent); } diff --git a/thirdparty/libwebp/src/enc/backward_references_enc.h b/thirdparty/libwebp/src/enc/backward_references_enc.h index 4c0267b41e9..4dff1c27b57 100644 --- a/thirdparty/libwebp/src/enc/backward_references_enc.h +++ b/thirdparty/libwebp/src/enc/backward_references_enc.h @@ -134,10 +134,11 @@ struct VP8LHashChain { // Must be called first, to set size. int VP8LHashChainInit(VP8LHashChain* const p, int size); -// Pre-compute the best matches for argb. +// Pre-compute the best matches for argb. pic and percent are for progress. int VP8LHashChainFill(VP8LHashChain* const p, int quality, const uint32_t* const argb, int xsize, int ysize, - int low_effort); + int low_effort, const WebPPicture* const pic, + int percent_range, int* const percent); void VP8LHashChainClear(VP8LHashChain* const p); // release memory static WEBP_INLINE int VP8LHashChainFindOffset(const VP8LHashChain* const p, @@ -227,11 +228,14 @@ enum VP8LLZ77Type { // VP8LBackwardRefs is put in the first element, the best value with no-cache in // the second element. // In both cases, the last element is used as temporary internally. -WebPEncodingError VP8LGetBackwardReferences( +// pic and percent are for progress. +// Returns false in case of error (stored in pic->error_code). +int VP8LGetBackwardReferences( int width, int height, const uint32_t* const argb, int quality, int low_effort, int lz77_types_to_try, int cache_bits_max, int do_no_cache, const VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs, - int* const cache_bits_best); + int* const cache_bits_best, const WebPPicture* const pic, int percent_range, + int* const percent); #ifdef __cplusplus } diff --git a/thirdparty/libwebp/src/enc/histogram_enc.c b/thirdparty/libwebp/src/enc/histogram_enc.c index 38a0cebcab1..8418def2e10 100644 --- a/thirdparty/libwebp/src/enc/histogram_enc.c +++ b/thirdparty/libwebp/src/enc/histogram_enc.c @@ -13,15 +13,17 @@ #include "src/webp/config.h" #endif +#include #include -#include "src/enc/backward_references_enc.h" -#include "src/enc/histogram_enc.h" #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" +#include "src/enc/backward_references_enc.h" +#include "src/enc/histogram_enc.h" +#include "src/enc/vp8i_enc.h" #include "src/utils/utils.h" -#define MAX_COST 1.e38 +#define MAX_BIT_COST FLT_MAX // Number of partitions for the three dominant (literal, red and blue) symbol // costs. @@ -228,8 +230,8 @@ void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo, // ----------------------------------------------------------------------------- // Entropy-related functions. -static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { - double mix; +static WEBP_INLINE float BitsEntropyRefine(const VP8LBitEntropy* entropy) { + float mix; if (entropy->nonzeros < 5) { if (entropy->nonzeros <= 1) { return 0; @@ -238,67 +240,67 @@ static WEBP_INLINE double BitsEntropyRefine(const VP8LBitEntropy* entropy) { // Let's mix in a bit of entropy to favor good clustering when // distributions of these are combined. if (entropy->nonzeros == 2) { - return 0.99 * entropy->sum + 0.01 * entropy->entropy; + return 0.99f * entropy->sum + 0.01f * entropy->entropy; } // No matter what the entropy says, we cannot be better than min_limit // with Huffman coding. I am mixing a bit of entropy into the // min_limit since it produces much better (~0.5 %) compression results // perhaps because of better entropy clustering. if (entropy->nonzeros == 3) { - mix = 0.95; + mix = 0.95f; } else { - mix = 0.7; // nonzeros == 4. + mix = 0.7f; // nonzeros == 4. } } else { - mix = 0.627; + mix = 0.627f; } { - double min_limit = 2 * entropy->sum - entropy->max_val; - min_limit = mix * min_limit + (1.0 - mix) * entropy->entropy; + float min_limit = 2.f * entropy->sum - entropy->max_val; + min_limit = mix * min_limit + (1.f - mix) * entropy->entropy; return (entropy->entropy < min_limit) ? min_limit : entropy->entropy; } } -double VP8LBitsEntropy(const uint32_t* const array, int n) { +float VP8LBitsEntropy(const uint32_t* const array, int n) { VP8LBitEntropy entropy; VP8LBitsEntropyUnrefined(array, n, &entropy); return BitsEntropyRefine(&entropy); } -static double InitialHuffmanCost(void) { +static float InitialHuffmanCost(void) { // Small bias because Huffman code length is typically not stored in // full length. static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3; - static const double kSmallBias = 9.1; + static const float kSmallBias = 9.1f; return kHuffmanCodeOfHuffmanCodeSize - kSmallBias; } // Finalize the Huffman cost based on streak numbers and length type (<3 or >=3) -static double FinalHuffmanCost(const VP8LStreaks* const stats) { +static float FinalHuffmanCost(const VP8LStreaks* const stats) { // The constants in this function are experimental and got rounded from // their original values in 1/8 when switched to 1/1024. - double retval = InitialHuffmanCost(); + float retval = InitialHuffmanCost(); // Second coefficient: Many zeros in the histogram are covered efficiently // by a run-length encode. Originally 2/8. - retval += stats->counts[0] * 1.5625 + 0.234375 * stats->streaks[0][1]; + retval += stats->counts[0] * 1.5625f + 0.234375f * stats->streaks[0][1]; // Second coefficient: Constant values are encoded less efficiently, but still // RLE'ed. Originally 6/8. - retval += stats->counts[1] * 2.578125 + 0.703125 * stats->streaks[1][1]; + retval += stats->counts[1] * 2.578125f + 0.703125f * stats->streaks[1][1]; // 0s are usually encoded more efficiently than non-0s. // Originally 15/8. - retval += 1.796875 * stats->streaks[0][0]; + retval += 1.796875f * stats->streaks[0][0]; // Originally 26/8. - retval += 3.28125 * stats->streaks[1][0]; + retval += 3.28125f * stats->streaks[1][0]; return retval; } // Get the symbol entropy for the distribution 'population'. // Set 'trivial_sym', if there's only one symbol present in the distribution. -static double PopulationCost(const uint32_t* const population, int length, - uint32_t* const trivial_sym, - uint8_t* const is_used) { +static float PopulationCost(const uint32_t* const population, int length, + uint32_t* const trivial_sym, + uint8_t* const is_used) { VP8LBitEntropy bit_entropy; VP8LStreaks stats; VP8LGetEntropyUnrefined(population, length, &bit_entropy, &stats); @@ -314,11 +316,10 @@ static double PopulationCost(const uint32_t* const population, int length, // trivial_at_end is 1 if the two histograms only have one element that is // non-zero: both the zero-th one, or both the last one. -static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, - const uint32_t* const Y, - int length, int is_X_used, - int is_Y_used, - int trivial_at_end) { +static WEBP_INLINE float GetCombinedEntropy(const uint32_t* const X, + const uint32_t* const Y, int length, + int is_X_used, int is_Y_used, + int trivial_at_end) { VP8LStreaks stats; if (trivial_at_end) { // This configuration is due to palettization that transforms an indexed @@ -356,7 +357,7 @@ static WEBP_INLINE double GetCombinedEntropy(const uint32_t* const X, } // Estimates the Entropy + Huffman + other block overhead size cost. -double VP8LHistogramEstimateBits(VP8LHistogram* const p) { +float VP8LHistogramEstimateBits(VP8LHistogram* const p) { return PopulationCost(p->literal_, VP8LHistogramNumCodes(p->palette_code_bits_), NULL, &p->is_used_[0]) @@ -373,8 +374,7 @@ double VP8LHistogramEstimateBits(VP8LHistogram* const p) { static int GetCombinedHistogramEntropy(const VP8LHistogram* const a, const VP8LHistogram* const b, - double cost_threshold, - double* cost) { + float cost_threshold, float* cost) { const int palette_code_bits = a->palette_code_bits_; int trivial_at_end = 0; assert(a->palette_code_bits_ == b->palette_code_bits_); @@ -439,12 +439,11 @@ static WEBP_INLINE void HistogramAdd(const VP8LHistogram* const a, // Since the previous score passed is 'cost_threshold', we only need to compare // the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out // early. -static double HistogramAddEval(const VP8LHistogram* const a, - const VP8LHistogram* const b, - VP8LHistogram* const out, - double cost_threshold) { - double cost = 0; - const double sum_cost = a->bit_cost_ + b->bit_cost_; +static float HistogramAddEval(const VP8LHistogram* const a, + const VP8LHistogram* const b, + VP8LHistogram* const out, float cost_threshold) { + float cost = 0; + const float sum_cost = a->bit_cost_ + b->bit_cost_; cost_threshold += sum_cost; if (GetCombinedHistogramEntropy(a, b, cost_threshold, &cost)) { @@ -459,10 +458,10 @@ static double HistogramAddEval(const VP8LHistogram* const a, // Same as HistogramAddEval(), except that the resulting histogram // is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit // the term C(b) which is constant over all the evaluations. -static double HistogramAddThresh(const VP8LHistogram* const a, - const VP8LHistogram* const b, - double cost_threshold) { - double cost; +static float HistogramAddThresh(const VP8LHistogram* const a, + const VP8LHistogram* const b, + float cost_threshold) { + float cost; assert(a != NULL && b != NULL); cost = -a->bit_cost_; GetCombinedHistogramEntropy(a, b, cost_threshold, &cost); @@ -473,24 +472,22 @@ static double HistogramAddThresh(const VP8LHistogram* const a, // The structure to keep track of cost range for the three dominant entropy // symbols. -// TODO(skal): Evaluate if float can be used here instead of double for -// representing the entropy costs. typedef struct { - double literal_max_; - double literal_min_; - double red_max_; - double red_min_; - double blue_max_; - double blue_min_; + float literal_max_; + float literal_min_; + float red_max_; + float red_min_; + float blue_max_; + float blue_min_; } DominantCostRange; static void DominantCostRangeInit(DominantCostRange* const c) { c->literal_max_ = 0.; - c->literal_min_ = MAX_COST; + c->literal_min_ = MAX_BIT_COST; c->red_max_ = 0.; - c->red_min_ = MAX_COST; + c->red_min_ = MAX_BIT_COST; c->blue_max_ = 0.; - c->blue_min_ = MAX_COST; + c->blue_min_ = MAX_BIT_COST; } static void UpdateDominantCostRange( @@ -505,10 +502,9 @@ static void UpdateDominantCostRange( static void UpdateHistogramCost(VP8LHistogram* const h) { uint32_t alpha_sym, red_sym, blue_sym; - const double alpha_cost = - PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, - &h->is_used_[3]); - const double distance_cost = + const float alpha_cost = + PopulationCost(h->alpha_, NUM_LITERAL_CODES, &alpha_sym, &h->is_used_[3]); + const float distance_cost = PopulationCost(h->distance_, NUM_DISTANCE_CODES, NULL, &h->is_used_[4]) + VP8LExtraCost(h->distance_, NUM_DISTANCE_CODES); const int num_codes = VP8LHistogramNumCodes(h->palette_code_bits_); @@ -529,10 +525,10 @@ static void UpdateHistogramCost(VP8LHistogram* const h) { } } -static int GetBinIdForEntropy(double min, double max, double val) { - const double range = max - min; +static int GetBinIdForEntropy(float min, float max, float val) { + const float range = max - min; if (range > 0.) { - const double delta = val - min; + const float delta = val - min; return (int)((NUM_PARTITIONS - 1e-6) * delta / range); } else { return 0; @@ -641,15 +637,11 @@ static void HistogramAnalyzeEntropyBin(VP8LHistogramSet* const image_histo, // Merges some histograms with same bin_id together if it's advantageous. // Sets the remaining histograms to NULL. -static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, - int* num_used, - const uint16_t* const clusters, - uint16_t* const cluster_mappings, - VP8LHistogram* cur_combo, - const uint16_t* const bin_map, - int num_bins, - double combine_cost_factor, - int low_effort) { +static void HistogramCombineEntropyBin( + VP8LHistogramSet* const image_histo, int* num_used, + const uint16_t* const clusters, uint16_t* const cluster_mappings, + VP8LHistogram* cur_combo, const uint16_t* const bin_map, int num_bins, + float combine_cost_factor, int low_effort) { VP8LHistogram** const histograms = image_histo->histograms; int idx; struct { @@ -679,11 +671,10 @@ static void HistogramCombineEntropyBin(VP8LHistogramSet* const image_histo, cluster_mappings[clusters[idx]] = clusters[first]; } else { // try to merge #idx into #first (both share the same bin_id) - const double bit_cost = histograms[idx]->bit_cost_; - const double bit_cost_thresh = -bit_cost * combine_cost_factor; - const double curr_cost_diff = - HistogramAddEval(histograms[first], histograms[idx], - cur_combo, bit_cost_thresh); + const float bit_cost = histograms[idx]->bit_cost_; + const float bit_cost_thresh = -bit_cost * combine_cost_factor; + const float curr_cost_diff = HistogramAddEval( + histograms[first], histograms[idx], cur_combo, bit_cost_thresh); if (curr_cost_diff < bit_cost_thresh) { // Try to merge two histograms only if the combo is a trivial one or // the two candidate histograms are already non-trivial. @@ -731,8 +722,8 @@ static uint32_t MyRand(uint32_t* const seed) { typedef struct { int idx1; int idx2; - double cost_diff; - double cost_combo; + float cost_diff; + float cost_combo; } HistogramPair; typedef struct { @@ -787,10 +778,9 @@ static void HistoQueueUpdateHead(HistoQueue* const histo_queue, // Update the cost diff and combo of a pair of histograms. This needs to be // called when the the histograms have been merged with a third one. static void HistoQueueUpdatePair(const VP8LHistogram* const h1, - const VP8LHistogram* const h2, - double threshold, + const VP8LHistogram* const h2, float threshold, HistogramPair* const pair) { - const double sum_cost = h1->bit_cost_ + h2->bit_cost_; + const float sum_cost = h1->bit_cost_ + h2->bit_cost_; pair->cost_combo = 0.; GetCombinedHistogramEntropy(h1, h2, sum_cost + threshold, &pair->cost_combo); pair->cost_diff = pair->cost_combo - sum_cost; @@ -799,9 +789,9 @@ static void HistoQueueUpdatePair(const VP8LHistogram* const h1, // Create a pair from indices "idx1" and "idx2" provided its cost // is inferior to "threshold", a negative entropy. // It returns the cost of the pair, or 0. if it superior to threshold. -static double HistoQueuePush(HistoQueue* const histo_queue, - VP8LHistogram** const histograms, int idx1, - int idx2, double threshold) { +static float HistoQueuePush(HistoQueue* const histo_queue, + VP8LHistogram** const histograms, int idx1, + int idx2, float threshold) { const VP8LHistogram* h1; const VP8LHistogram* h2; HistogramPair pair; @@ -945,8 +935,8 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, ++tries_with_no_success < num_tries_no_success; ++iter) { int* mapping_index; - double best_cost = - (histo_queue.size == 0) ? 0. : histo_queue.queue[0].cost_diff; + float best_cost = + (histo_queue.size == 0) ? 0.f : histo_queue.queue[0].cost_diff; int best_idx1 = -1, best_idx2 = 1; const uint32_t rand_range = (*num_used - 1) * (*num_used); // (*num_used) / 2 was chosen empirically. Less means faster but worse @@ -955,7 +945,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, // Pick random samples. for (j = 0; *num_used >= 2 && j < num_tries; ++j) { - double curr_cost; + float curr_cost; // Choose two different histograms at random and try to combine them. const uint32_t tmp = MyRand(&seed) % rand_range; uint32_t idx1 = tmp / (*num_used - 1); @@ -1034,7 +1024,7 @@ static int HistogramCombineStochastic(VP8LHistogramSet* const image_histo, *do_greedy = (*num_used <= min_cluster_size); ok = 1; -End: + End: HistoQueueClear(&histo_queue); WebPSafeFree(mappings); return ok; @@ -1057,7 +1047,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, if (out_size > 1) { for (i = 0; i < in_size; ++i) { int best_out = 0; - double best_bits = MAX_COST; + float best_bits = MAX_BIT_COST; int k; if (in_histo[i] == NULL) { // Arbitrarily set to the previous value if unused to help future LZ77. @@ -1065,7 +1055,7 @@ static void HistogramRemap(const VP8LHistogramSet* const in, continue; } for (k = 0; k < out_size; ++k) { - double cur_bits; + float cur_bits; cur_bits = HistogramAddThresh(out_histo[k], in_histo[i], best_bits); if (k == 0 || cur_bits < best_bits) { best_bits = cur_bits; @@ -1093,13 +1083,13 @@ static void HistogramRemap(const VP8LHistogramSet* const in, } } -static double GetCombineCostFactor(int histo_size, int quality) { - double combine_cost_factor = 0.16; +static float GetCombineCostFactor(int histo_size, int quality) { + float combine_cost_factor = 0.16f; if (quality < 90) { - if (histo_size > 256) combine_cost_factor /= 2.; - if (histo_size > 512) combine_cost_factor /= 2.; - if (histo_size > 1024) combine_cost_factor /= 2.; - if (quality <= 50) combine_cost_factor /= 2.; + if (histo_size > 256) combine_cost_factor /= 2.f; + if (histo_size > 512) combine_cost_factor /= 2.f; + if (histo_size > 1024) combine_cost_factor /= 2.f; + if (quality <= 50) combine_cost_factor /= 2.f; } return combine_cost_factor; } @@ -1169,13 +1159,13 @@ static void RemoveEmptyHistograms(VP8LHistogramSet* const image_histo) { } int VP8LGetHistoImageSymbols(int xsize, int ysize, - const VP8LBackwardRefs* const refs, - int quality, int low_effort, - int histogram_bits, int cache_bits, + const VP8LBackwardRefs* const refs, int quality, + int low_effort, int histogram_bits, int cache_bits, VP8LHistogramSet* const image_histo, VP8LHistogram* const tmp_histo, - uint16_t* const histogram_symbols) { - int ok = 0; + uint16_t* const histogram_symbols, + const WebPPicture* const pic, int percent_range, + int* const percent) { const int histo_xsize = histogram_bits ? VP8LSubSampleSize(xsize, histogram_bits) : 1; const int histo_ysize = @@ -1192,7 +1182,10 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, WebPSafeMalloc(2 * image_histo_raw_size, sizeof(map_tmp)); uint16_t* const cluster_mappings = map_tmp + image_histo_raw_size; int num_used = image_histo_raw_size; - if (orig_histo == NULL || map_tmp == NULL) goto Error; + if (orig_histo == NULL || map_tmp == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } // Construct the histograms from backward references. HistogramBuild(xsize, histogram_bits, refs, orig_histo); @@ -1206,16 +1199,15 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, if (entropy_combine) { uint16_t* const bin_map = map_tmp; - const double combine_cost_factor = + const float combine_cost_factor = GetCombineCostFactor(image_histo_raw_size, quality); const uint32_t num_clusters = num_used; HistogramAnalyzeEntropyBin(image_histo, bin_map, low_effort); // Collapse histograms with similar entropy. - HistogramCombineEntropyBin(image_histo, &num_used, histogram_symbols, - cluster_mappings, tmp_histo, bin_map, - entropy_combine_num_bins, combine_cost_factor, - low_effort); + HistogramCombineEntropyBin( + image_histo, &num_used, histogram_symbols, cluster_mappings, tmp_histo, + bin_map, entropy_combine_num_bins, combine_cost_factor, low_effort); OptimizeHistogramSymbols(image_histo, cluster_mappings, num_clusters, map_tmp, histogram_symbols); } @@ -1229,11 +1221,13 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, int do_greedy; if (!HistogramCombineStochastic(image_histo, &num_used, threshold_size, &do_greedy)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } if (do_greedy) { RemoveEmptyHistograms(image_histo); if (!HistogramCombineGreedy(image_histo, &num_used)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } } @@ -1243,10 +1237,12 @@ int VP8LGetHistoImageSymbols(int xsize, int ysize, RemoveEmptyHistograms(image_histo); HistogramRemap(orig_histo, image_histo, histogram_symbols); - ok = 1; + if (!WebPReportProgress(pic, *percent + percent_range, percent)) { + goto Error; + } Error: VP8LFreeHistogramSet(orig_histo); WebPSafeFree(map_tmp); - return ok; + return (pic->error_code == VP8_ENC_OK); } diff --git a/thirdparty/libwebp/src/enc/histogram_enc.h b/thirdparty/libwebp/src/enc/histogram_enc.h index c3428b5d554..4c0bb97464d 100644 --- a/thirdparty/libwebp/src/enc/histogram_enc.h +++ b/thirdparty/libwebp/src/enc/histogram_enc.h @@ -40,10 +40,10 @@ typedef struct { int palette_code_bits_; uint32_t trivial_symbol_; // True, if histograms for Red, Blue & Alpha // literal symbols are single valued. - double bit_cost_; // cached value of bit cost. - double literal_cost_; // Cached values of dominant entropy costs: - double red_cost_; // literal, red & blue. - double blue_cost_; + float bit_cost_; // cached value of bit cost. + float literal_cost_; // Cached values of dominant entropy costs: + float red_cost_; // literal, red & blue. + float blue_cost_; uint8_t is_used_[5]; // 5 for literal, red, blue, alpha, distance } VP8LHistogram; @@ -105,21 +105,23 @@ static WEBP_INLINE int VP8LHistogramNumCodes(int palette_code_bits) { ((palette_code_bits > 0) ? (1 << palette_code_bits) : 0); } -// Builds the histogram image. +// Builds the histogram image. pic and percent are for progress. +// Returns false in case of error (stored in pic->error_code). int VP8LGetHistoImageSymbols(int xsize, int ysize, - const VP8LBackwardRefs* const refs, - int quality, int low_effort, - int histogram_bits, int cache_bits, + const VP8LBackwardRefs* const refs, int quality, + int low_effort, int histogram_bits, int cache_bits, VP8LHistogramSet* const image_histo, VP8LHistogram* const tmp_histo, - uint16_t* const histogram_symbols); + uint16_t* const histogram_symbols, + const WebPPicture* const pic, int percent_range, + int* const percent); // Returns the entropy for the symbols in the input array. -double VP8LBitsEntropy(const uint32_t* const array, int n); +float VP8LBitsEntropy(const uint32_t* const array, int n); // Estimate how many bits the combined entropy of literals and distance // approximately maps to. -double VP8LHistogramEstimateBits(VP8LHistogram* const p); +float VP8LHistogramEstimateBits(VP8LHistogram* const p); #ifdef __cplusplus } diff --git a/thirdparty/libwebp/src/enc/picture_csp_enc.c b/thirdparty/libwebp/src/enc/picture_csp_enc.c index 35eede96355..fabebcf2026 100644 --- a/thirdparty/libwebp/src/enc/picture_csp_enc.c +++ b/thirdparty/libwebp/src/enc/picture_csp_enc.c @@ -15,12 +15,19 @@ #include #include +#include "sharpyuv/sharpyuv.h" +#include "sharpyuv/sharpyuv_csp.h" #include "src/enc/vp8i_enc.h" #include "src/utils/random_utils.h" #include "src/utils/utils.h" #include "src/dsp/dsp.h" #include "src/dsp/lossless.h" #include "src/dsp/yuv.h" +#include "src/dsp/cpu.h" + +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) +#include +#endif // Uncomment to disable gamma-compression during RGB->U/V averaging #define USE_GAMMA_COMPRESSION @@ -76,16 +83,16 @@ int WebPPictureHasTransparency(const WebPPicture* picture) { #if defined(USE_GAMMA_COMPRESSION) -// gamma-compensates loss of resolution during chroma subsampling -#define kGamma 0.80 // for now we use a different gamma value than kGammaF -#define kGammaFix 12 // fixed-point precision for linear values -#define kGammaScale ((1 << kGammaFix) - 1) -#define kGammaTabFix 7 // fixed-point fractional bits precision -#define kGammaTabScale (1 << kGammaTabFix) -#define kGammaTabRounder (kGammaTabScale >> 1) -#define kGammaTabSize (1 << (kGammaFix - kGammaTabFix)) +// Gamma correction compensates loss of resolution during chroma subsampling. +#define GAMMA_FIX 12 // fixed-point precision for linear values +#define GAMMA_TAB_FIX 7 // fixed-point fractional bits precision +#define GAMMA_TAB_SIZE (1 << (GAMMA_FIX - GAMMA_TAB_FIX)) +static const double kGamma = 0.80; +static const int kGammaScale = ((1 << GAMMA_FIX) - 1); +static const int kGammaTabScale = (1 << GAMMA_TAB_FIX); +static const int kGammaTabRounder = (1 << GAMMA_TAB_FIX >> 1); -static int kLinearToGammaTab[kGammaTabSize + 1]; +static int kLinearToGammaTab[GAMMA_TAB_SIZE + 1]; static uint16_t kGammaToLinearTab[256]; static volatile int kGammaTablesOk = 0; static void InitGammaTables(void); @@ -93,13 +100,13 @@ static void InitGammaTables(void); WEBP_DSP_INIT_FUNC(InitGammaTables) { if (!kGammaTablesOk) { int v; - const double scale = (double)(1 << kGammaTabFix) / kGammaScale; + const double scale = (double)(1 << GAMMA_TAB_FIX) / kGammaScale; const double norm = 1. / 255.; for (v = 0; v <= 255; ++v) { kGammaToLinearTab[v] = (uint16_t)(pow(norm * v, kGamma) * kGammaScale + .5); } - for (v = 0; v <= kGammaTabSize; ++v) { + for (v = 0; v <= GAMMA_TAB_SIZE; ++v) { kLinearToGammaTab[v] = (int)(255. * pow(scale * v, 1. / kGamma) + .5); } kGammaTablesOk = 1; @@ -111,12 +118,12 @@ static WEBP_INLINE uint32_t GammaToLinear(uint8_t v) { } static WEBP_INLINE int Interpolate(int v) { - const int tab_pos = v >> (kGammaTabFix + 2); // integer part + const int tab_pos = v >> (GAMMA_TAB_FIX + 2); // integer part const int x = v & ((kGammaTabScale << 2) - 1); // fractional part const int v0 = kLinearToGammaTab[tab_pos]; const int v1 = kLinearToGammaTab[tab_pos + 1]; const int y = v1 * x + v0 * ((kGammaTabScale << 2) - x); // interpolate - assert(tab_pos + 1 < kGammaTabSize + 1); + assert(tab_pos + 1 < GAMMA_TAB_SIZE + 1); return y; } @@ -124,7 +131,7 @@ static WEBP_INLINE int Interpolate(int v) { // U/V value, suitable for RGBToU/V calls. static WEBP_INLINE int LinearToGamma(uint32_t base_value, int shift) { const int y = Interpolate(base_value << shift); // final uplifted value - return (y + kGammaTabRounder) >> kGammaTabFix; // descale + return (y + kGammaTabRounder) >> GAMMA_TAB_FIX; // descale } #else @@ -158,415 +165,41 @@ static int RGBToV(int r, int g, int b, VP8Random* const rg) { //------------------------------------------------------------------------------ // Sharp RGB->YUV conversion -static const int kNumIterations = 4; static const int kMinDimensionIterativeConversion = 4; -// We could use SFIX=0 and only uint8_t for fixed_y_t, but it produces some -// banding sometimes. Better use extra precision. -#define SFIX 2 // fixed-point precision of RGB and Y/W -typedef int16_t fixed_t; // signed type with extra SFIX precision for UV -typedef uint16_t fixed_y_t; // unsigned type with extra SFIX precision for W - -#define SHALF (1 << SFIX >> 1) -#define MAX_Y_T ((256 << SFIX) - 1) -#define SROUNDER (1 << (YUV_FIX + SFIX - 1)) - -#if defined(USE_GAMMA_COMPRESSION) - -// We use tables of different size and precision for the Rec709 / BT2020 -// transfer function. -#define kGammaF (1./0.45) -static uint32_t kLinearToGammaTabS[kGammaTabSize + 2]; -#define GAMMA_TO_LINEAR_BITS 14 -static uint32_t kGammaToLinearTabS[MAX_Y_T + 1]; // size scales with Y_FIX -static volatile int kGammaTablesSOk = 0; -static void InitGammaTablesS(void); - -WEBP_DSP_INIT_FUNC(InitGammaTablesS) { - assert(2 * GAMMA_TO_LINEAR_BITS < 32); // we use uint32_t intermediate values - if (!kGammaTablesSOk) { - int v; - const double norm = 1. / MAX_Y_T; - const double scale = 1. / kGammaTabSize; - const double a = 0.09929682680944; - const double thresh = 0.018053968510807; - const double final_scale = 1 << GAMMA_TO_LINEAR_BITS; - for (v = 0; v <= MAX_Y_T; ++v) { - const double g = norm * v; - double value; - if (g <= thresh * 4.5) { - value = g / 4.5; - } else { - const double a_rec = 1. / (1. + a); - value = pow(a_rec * (g + a), kGammaF); - } - kGammaToLinearTabS[v] = (uint32_t)(value * final_scale + .5); - } - for (v = 0; v <= kGammaTabSize; ++v) { - const double g = scale * v; - double value; - if (g <= thresh) { - value = 4.5 * g; - } else { - value = (1. + a) * pow(g, 1. / kGammaF) - a; - } - // we already incorporate the 1/2 rounding constant here - kLinearToGammaTabS[v] = - (uint32_t)(MAX_Y_T * value) + (1 << GAMMA_TO_LINEAR_BITS >> 1); - } - // to prevent small rounding errors to cause read-overflow: - kLinearToGammaTabS[kGammaTabSize + 1] = kLinearToGammaTabS[kGammaTabSize]; - kGammaTablesSOk = 1; - } -} - -// return value has a fixed-point precision of GAMMA_TO_LINEAR_BITS -static WEBP_INLINE uint32_t GammaToLinearS(int v) { - return kGammaToLinearTabS[v]; -} - -static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { - // 'value' is in GAMMA_TO_LINEAR_BITS fractional precision - const uint32_t v = value * kGammaTabSize; - const uint32_t tab_pos = v >> GAMMA_TO_LINEAR_BITS; - // fractional part, in GAMMA_TO_LINEAR_BITS fixed-point precision - const uint32_t x = v - (tab_pos << GAMMA_TO_LINEAR_BITS); // fractional part - // v0 / v1 are in GAMMA_TO_LINEAR_BITS fixed-point precision (range [0..1]) - const uint32_t v0 = kLinearToGammaTabS[tab_pos + 0]; - const uint32_t v1 = kLinearToGammaTabS[tab_pos + 1]; - // Final interpolation. Note that rounding is already included. - const uint32_t v2 = (v1 - v0) * x; // note: v1 >= v0. - const uint32_t result = v0 + (v2 >> GAMMA_TO_LINEAR_BITS); - return result; -} - -#else - -static void InitGammaTablesS(void) {} -static WEBP_INLINE uint32_t GammaToLinearS(int v) { - return (v << GAMMA_TO_LINEAR_BITS) / MAX_Y_T; -} -static WEBP_INLINE uint32_t LinearToGammaS(uint32_t value) { - return (MAX_Y_T * value) >> GAMMA_TO_LINEAR_BITS; -} - -#endif // USE_GAMMA_COMPRESSION - -//------------------------------------------------------------------------------ - -static uint8_t clip_8b(fixed_t v) { - return (!(v & ~0xff)) ? (uint8_t)v : (v < 0) ? 0u : 255u; -} - -static fixed_y_t clip_y(int y) { - return (!(y & ~MAX_Y_T)) ? (fixed_y_t)y : (y < 0) ? 0 : MAX_Y_T; -} - -//------------------------------------------------------------------------------ - -static int RGBToGray(int r, int g, int b) { - const int luma = 13933 * r + 46871 * g + 4732 * b + YUV_HALF; - return (luma >> YUV_FIX); -} - -static uint32_t ScaleDown(int a, int b, int c, int d) { - const uint32_t A = GammaToLinearS(a); - const uint32_t B = GammaToLinearS(b); - const uint32_t C = GammaToLinearS(c); - const uint32_t D = GammaToLinearS(d); - return LinearToGammaS((A + B + C + D + 2) >> 2); -} - -static WEBP_INLINE void UpdateW(const fixed_y_t* src, fixed_y_t* dst, int w) { - int i; - for (i = 0; i < w; ++i) { - const uint32_t R = GammaToLinearS(src[0 * w + i]); - const uint32_t G = GammaToLinearS(src[1 * w + i]); - const uint32_t B = GammaToLinearS(src[2 * w + i]); - const uint32_t Y = RGBToGray(R, G, B); - dst[i] = (fixed_y_t)LinearToGammaS(Y); - } -} - -static void UpdateChroma(const fixed_y_t* src1, const fixed_y_t* src2, - fixed_t* dst, int uv_w) { - int i; - for (i = 0; i < uv_w; ++i) { - const int r = ScaleDown(src1[0 * uv_w + 0], src1[0 * uv_w + 1], - src2[0 * uv_w + 0], src2[0 * uv_w + 1]); - const int g = ScaleDown(src1[2 * uv_w + 0], src1[2 * uv_w + 1], - src2[2 * uv_w + 0], src2[2 * uv_w + 1]); - const int b = ScaleDown(src1[4 * uv_w + 0], src1[4 * uv_w + 1], - src2[4 * uv_w + 0], src2[4 * uv_w + 1]); - const int W = RGBToGray(r, g, b); - dst[0 * uv_w] = (fixed_t)(r - W); - dst[1 * uv_w] = (fixed_t)(g - W); - dst[2 * uv_w] = (fixed_t)(b - W); - dst += 1; - src1 += 2; - src2 += 2; - } -} - -static void StoreGray(const fixed_y_t* rgb, fixed_y_t* y, int w) { - int i; - for (i = 0; i < w; ++i) { - y[i] = RGBToGray(rgb[0 * w + i], rgb[1 * w + i], rgb[2 * w + i]); - } -} - -//------------------------------------------------------------------------------ - -static WEBP_INLINE fixed_y_t Filter2(int A, int B, int W0) { - const int v0 = (A * 3 + B + 2) >> 2; - return clip_y(v0 + W0); -} - -//------------------------------------------------------------------------------ - -static WEBP_INLINE fixed_y_t UpLift(uint8_t a) { // 8bit -> SFIX - return ((fixed_y_t)a << SFIX) | SHALF; -} - -static void ImportOneRow(const uint8_t* const r_ptr, - const uint8_t* const g_ptr, - const uint8_t* const b_ptr, - int step, - int pic_width, - fixed_y_t* const dst) { - int i; - const int w = (pic_width + 1) & ~1; - for (i = 0; i < pic_width; ++i) { - const int off = i * step; - dst[i + 0 * w] = UpLift(r_ptr[off]); - dst[i + 1 * w] = UpLift(g_ptr[off]); - dst[i + 2 * w] = UpLift(b_ptr[off]); - } - if (pic_width & 1) { // replicate rightmost pixel - dst[pic_width + 0 * w] = dst[pic_width + 0 * w - 1]; - dst[pic_width + 1 * w] = dst[pic_width + 1 * w - 1]; - dst[pic_width + 2 * w] = dst[pic_width + 2 * w - 1]; - } -} - -static void InterpolateTwoRows(const fixed_y_t* const best_y, - const fixed_t* prev_uv, - const fixed_t* cur_uv, - const fixed_t* next_uv, - int w, - fixed_y_t* out1, - fixed_y_t* out2) { - const int uv_w = w >> 1; - const int len = (w - 1) >> 1; // length to filter - int k = 3; - while (k-- > 0) { // process each R/G/B segments in turn - // special boundary case for i==0 - out1[0] = Filter2(cur_uv[0], prev_uv[0], best_y[0]); - out2[0] = Filter2(cur_uv[0], next_uv[0], best_y[w]); - - WebPSharpYUVFilterRow(cur_uv, prev_uv, len, best_y + 0 + 1, out1 + 1); - WebPSharpYUVFilterRow(cur_uv, next_uv, len, best_y + w + 1, out2 + 1); - - // special boundary case for i == w - 1 when w is even - if (!(w & 1)) { - out1[w - 1] = Filter2(cur_uv[uv_w - 1], prev_uv[uv_w - 1], - best_y[w - 1 + 0]); - out2[w - 1] = Filter2(cur_uv[uv_w - 1], next_uv[uv_w - 1], - best_y[w - 1 + w]); - } - out1 += w; - out2 += w; - prev_uv += uv_w; - cur_uv += uv_w; - next_uv += uv_w; - } -} - -static WEBP_INLINE uint8_t ConvertRGBToY(int r, int g, int b) { - const int luma = 16839 * r + 33059 * g + 6420 * b + SROUNDER; - return clip_8b(16 + (luma >> (YUV_FIX + SFIX))); -} - -static WEBP_INLINE uint8_t ConvertRGBToU(int r, int g, int b) { - const int u = -9719 * r - 19081 * g + 28800 * b + SROUNDER; - return clip_8b(128 + (u >> (YUV_FIX + SFIX))); -} - -static WEBP_INLINE uint8_t ConvertRGBToV(int r, int g, int b) { - const int v = +28800 * r - 24116 * g - 4684 * b + SROUNDER; - return clip_8b(128 + (v >> (YUV_FIX + SFIX))); -} - -static int ConvertWRGBToYUV(const fixed_y_t* best_y, const fixed_t* best_uv, - WebPPicture* const picture) { - int i, j; - uint8_t* dst_y = picture->y; - uint8_t* dst_u = picture->u; - uint8_t* dst_v = picture->v; - const fixed_t* const best_uv_base = best_uv; - const int w = (picture->width + 1) & ~1; - const int h = (picture->height + 1) & ~1; - const int uv_w = w >> 1; - const int uv_h = h >> 1; - for (best_uv = best_uv_base, j = 0; j < picture->height; ++j) { - for (i = 0; i < picture->width; ++i) { - const int off = (i >> 1); - const int W = best_y[i]; - const int r = best_uv[off + 0 * uv_w] + W; - const int g = best_uv[off + 1 * uv_w] + W; - const int b = best_uv[off + 2 * uv_w] + W; - dst_y[i] = ConvertRGBToY(r, g, b); - } - best_y += w; - best_uv += (j & 1) * 3 * uv_w; - dst_y += picture->y_stride; - } - for (best_uv = best_uv_base, j = 0; j < uv_h; ++j) { - for (i = 0; i < uv_w; ++i) { - const int off = i; - const int r = best_uv[off + 0 * uv_w]; - const int g = best_uv[off + 1 * uv_w]; - const int b = best_uv[off + 2 * uv_w]; - dst_u[i] = ConvertRGBToU(r, g, b); - dst_v[i] = ConvertRGBToV(r, g, b); - } - best_uv += 3 * uv_w; - dst_u += picture->uv_stride; - dst_v += picture->uv_stride; - } - return 1; -} - //------------------------------------------------------------------------------ // Main function -#define SAFE_ALLOC(W, H, T) ((T*)WebPSafeMalloc((W) * (H), sizeof(T))) +extern void SharpYuvInit(VP8CPUInfo cpu_info_func); + +static void SafeInitSharpYuv(void) { +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) + static pthread_mutex_t initsharpyuv_lock = PTHREAD_MUTEX_INITIALIZER; + if (pthread_mutex_lock(&initsharpyuv_lock)) return; +#endif + + SharpYuvInit(VP8GetCPUInfo); + +#if defined(WEBP_USE_THREAD) && !defined(_WIN32) + (void)pthread_mutex_unlock(&initsharpyuv_lock); +#endif +} static int PreprocessARGB(const uint8_t* r_ptr, const uint8_t* g_ptr, const uint8_t* b_ptr, int step, int rgb_stride, WebPPicture* const picture) { - // we expand the right/bottom border if needed - const int w = (picture->width + 1) & ~1; - const int h = (picture->height + 1) & ~1; - const int uv_w = w >> 1; - const int uv_h = h >> 1; - uint64_t prev_diff_y_sum = ~0; - int j, iter; - - // TODO(skal): allocate one big memory chunk. But for now, it's easier - // for valgrind debugging to have several chunks. - fixed_y_t* const tmp_buffer = SAFE_ALLOC(w * 3, 2, fixed_y_t); // scratch - fixed_y_t* const best_y_base = SAFE_ALLOC(w, h, fixed_y_t); - fixed_y_t* const target_y_base = SAFE_ALLOC(w, h, fixed_y_t); - fixed_y_t* const best_rgb_y = SAFE_ALLOC(w, 2, fixed_y_t); - fixed_t* const best_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); - fixed_t* const target_uv_base = SAFE_ALLOC(uv_w * 3, uv_h, fixed_t); - fixed_t* const best_rgb_uv = SAFE_ALLOC(uv_w * 3, 1, fixed_t); - fixed_y_t* best_y = best_y_base; - fixed_y_t* target_y = target_y_base; - fixed_t* best_uv = best_uv_base; - fixed_t* target_uv = target_uv_base; - const uint64_t diff_y_threshold = (uint64_t)(3.0 * w * h); - int ok; - - if (best_y_base == NULL || best_uv_base == NULL || - target_y_base == NULL || target_uv_base == NULL || - best_rgb_y == NULL || best_rgb_uv == NULL || - tmp_buffer == NULL) { - ok = WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); - goto End; + const int ok = SharpYuvConvert( + r_ptr, g_ptr, b_ptr, step, rgb_stride, /*rgb_bit_depth=*/8, + picture->y, picture->y_stride, picture->u, picture->uv_stride, picture->v, + picture->uv_stride, /*yuv_bit_depth=*/8, picture->width, + picture->height, SharpYuvGetConversionMatrix(kSharpYuvMatrixWebp)); + if (!ok) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } - assert(picture->width >= kMinDimensionIterativeConversion); - assert(picture->height >= kMinDimensionIterativeConversion); - - WebPInitConvertARGBToYUV(); - - // Import RGB samples to W/RGB representation. - for (j = 0; j < picture->height; j += 2) { - const int is_last_row = (j == picture->height - 1); - fixed_y_t* const src1 = tmp_buffer + 0 * w; - fixed_y_t* const src2 = tmp_buffer + 3 * w; - - // prepare two rows of input - ImportOneRow(r_ptr, g_ptr, b_ptr, step, picture->width, src1); - if (!is_last_row) { - ImportOneRow(r_ptr + rgb_stride, g_ptr + rgb_stride, b_ptr + rgb_stride, - step, picture->width, src2); - } else { - memcpy(src2, src1, 3 * w * sizeof(*src2)); - } - StoreGray(src1, best_y + 0, w); - StoreGray(src2, best_y + w, w); - - UpdateW(src1, target_y, w); - UpdateW(src2, target_y + w, w); - UpdateChroma(src1, src2, target_uv, uv_w); - memcpy(best_uv, target_uv, 3 * uv_w * sizeof(*best_uv)); - best_y += 2 * w; - best_uv += 3 * uv_w; - target_y += 2 * w; - target_uv += 3 * uv_w; - r_ptr += 2 * rgb_stride; - g_ptr += 2 * rgb_stride; - b_ptr += 2 * rgb_stride; - } - - // Iterate and resolve clipping conflicts. - for (iter = 0; iter < kNumIterations; ++iter) { - const fixed_t* cur_uv = best_uv_base; - const fixed_t* prev_uv = best_uv_base; - uint64_t diff_y_sum = 0; - - best_y = best_y_base; - best_uv = best_uv_base; - target_y = target_y_base; - target_uv = target_uv_base; - for (j = 0; j < h; j += 2) { - fixed_y_t* const src1 = tmp_buffer + 0 * w; - fixed_y_t* const src2 = tmp_buffer + 3 * w; - { - const fixed_t* const next_uv = cur_uv + ((j < h - 2) ? 3 * uv_w : 0); - InterpolateTwoRows(best_y, prev_uv, cur_uv, next_uv, w, src1, src2); - prev_uv = cur_uv; - cur_uv = next_uv; - } - - UpdateW(src1, best_rgb_y + 0 * w, w); - UpdateW(src2, best_rgb_y + 1 * w, w); - UpdateChroma(src1, src2, best_rgb_uv, uv_w); - - // update two rows of Y and one row of RGB - diff_y_sum += WebPSharpYUVUpdateY(target_y, best_rgb_y, best_y, 2 * w); - WebPSharpYUVUpdateRGB(target_uv, best_rgb_uv, best_uv, 3 * uv_w); - - best_y += 2 * w; - best_uv += 3 * uv_w; - target_y += 2 * w; - target_uv += 3 * uv_w; - } - // test exit condition - if (iter > 0) { - if (diff_y_sum < diff_y_threshold) break; - if (diff_y_sum > prev_diff_y_sum) break; - } - prev_diff_y_sum = diff_y_sum; - } - // final reconstruction - ok = ConvertWRGBToYUV(best_y_base, best_uv_base, picture); - - End: - WebPSafeFree(best_y_base); - WebPSafeFree(best_uv_base); - WebPSafeFree(target_y_base); - WebPSafeFree(target_uv_base); - WebPSafeFree(best_rgb_y); - WebPSafeFree(best_rgb_uv); - WebPSafeFree(tmp_buffer); return ok; } -#undef SAFE_ALLOC //------------------------------------------------------------------------------ // "Fast" regular RGB->YUV @@ -591,8 +224,8 @@ static const int kAlphaFix = 19; // and constant are adjusted very tightly to fit 32b arithmetic. // In particular, they use the fact that the operands for 'v / a' are actually // derived as v = (a0.p0 + a1.p1 + a2.p2 + a3.p3) and a = a0 + a1 + a2 + a3 -// with ai in [0..255] and pi in [0..1<width, picture->height)) return 0; + if (!WebPPictureAllocARGB(picture)) return 0; picture->use_argb = 1; // Convert @@ -1106,6 +748,8 @@ static int Import(WebPPicture* const picture, const int width = picture->width; const int height = picture->height; + if (abs(rgb_stride) < (import_alpha ? 4 : 3) * width) return 0; + if (!picture->use_argb) { const uint8_t* a_ptr = import_alpha ? rgb + 3 : NULL; return ImportYUVAFromRGBA(r_ptr, g_ptr, b_ptr, a_ptr, step, rgb_stride, @@ -1163,24 +807,24 @@ static int Import(WebPPicture* const picture, #if !defined(WEBP_REDUCE_CSP) int WebPPictureImportBGR(WebPPicture* picture, - const uint8_t* rgb, int rgb_stride) { - return (picture != NULL && rgb != NULL) - ? Import(picture, rgb, rgb_stride, 3, 1, 0) + const uint8_t* bgr, int bgr_stride) { + return (picture != NULL && bgr != NULL) + ? Import(picture, bgr, bgr_stride, 3, 1, 0) : 0; } int WebPPictureImportBGRA(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 1, 1) + const uint8_t* bgra, int bgra_stride) { + return (picture != NULL && bgra != NULL) + ? Import(picture, bgra, bgra_stride, 4, 1, 1) : 0; } int WebPPictureImportBGRX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 1, 0) + const uint8_t* bgrx, int bgrx_stride) { + return (picture != NULL && bgrx != NULL) + ? Import(picture, bgrx, bgrx_stride, 4, 1, 0) : 0; } @@ -1201,9 +845,9 @@ int WebPPictureImportRGBA(WebPPicture* picture, } int WebPPictureImportRGBX(WebPPicture* picture, - const uint8_t* rgba, int rgba_stride) { - return (picture != NULL && rgba != NULL) - ? Import(picture, rgba, rgba_stride, 4, 0, 0) + const uint8_t* rgbx, int rgbx_stride) { + return (picture != NULL && rgbx != NULL) + ? Import(picture, rgbx, rgbx_stride, 4, 0, 0) : 0; } diff --git a/thirdparty/libwebp/src/enc/picture_enc.c b/thirdparty/libwebp/src/enc/picture_enc.c index c691622d03c..3af6383d38d 100644 --- a/thirdparty/libwebp/src/enc/picture_enc.c +++ b/thirdparty/libwebp/src/enc/picture_enc.c @@ -45,6 +45,22 @@ int WebPPictureInitInternal(WebPPicture* picture, int version) { //------------------------------------------------------------------------------ +int WebPValidatePicture(const WebPPicture* const picture) { + if (picture == NULL) return 0; + if (picture->width <= 0 || picture->height <= 0) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); + } + if (picture->width <= 0 || picture->width / 4 > INT_MAX / 4 || + picture->height <= 0 || picture->height / 4 > INT_MAX / 4) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); + } + if (picture->colorspace != WEBP_YUV420 && + picture->colorspace != WEBP_YUV420A) { + return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); + } + return 1; +} + static void WebPPictureResetBufferARGB(WebPPicture* const picture) { picture->memory_argb_ = NULL; picture->argb = NULL; @@ -63,18 +79,17 @@ void WebPPictureResetBuffers(WebPPicture* const picture) { WebPPictureResetBufferYUVA(picture); } -int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { +int WebPPictureAllocARGB(WebPPicture* const picture) { void* memory; + const int width = picture->width; + const int height = picture->height; const uint64_t argb_size = (uint64_t)width * height; - assert(picture != NULL); + if (!WebPValidatePicture(picture)) return 0; WebPSafeFree(picture->memory_argb_); WebPPictureResetBufferARGB(picture); - if (width <= 0 || height <= 0) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_BAD_DIMENSION); - } // allocate a new buffer. memory = WebPSafeMalloc(argb_size + WEBP_ALIGN_CST, sizeof(*picture->argb)); if (memory == NULL) { @@ -86,10 +101,10 @@ int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height) { return 1; } -int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { - const WebPEncCSP uv_csp = - (WebPEncCSP)((int)picture->colorspace & WEBP_CSP_UV_MASK); +int WebPPictureAllocYUVA(WebPPicture* const picture) { const int has_alpha = (int)picture->colorspace & WEBP_CSP_ALPHA_BIT; + const int width = picture->width; + const int height = picture->height; const int y_stride = width; const int uv_width = (int)(((int64_t)width + 1) >> 1); const int uv_height = (int)(((int64_t)height + 1) >> 1); @@ -98,15 +113,11 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { uint64_t y_size, uv_size, a_size, total_size; uint8_t* mem; - assert(picture != NULL); + if (!WebPValidatePicture(picture)) return 0; WebPSafeFree(picture->memory_); WebPPictureResetBufferYUVA(picture); - if (uv_csp != WEBP_YUV420) { - return WebPEncodingSetError(picture, VP8_ENC_ERROR_INVALID_CONFIGURATION); - } - // alpha a_width = has_alpha ? width : 0; a_stride = a_width; @@ -152,15 +163,12 @@ int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height) { int WebPPictureAlloc(WebPPicture* picture) { if (picture != NULL) { - const int width = picture->width; - const int height = picture->height; - WebPPictureFree(picture); // erase previous buffer if (!picture->use_argb) { - return WebPPictureAllocYUVA(picture, width, height); + return WebPPictureAllocYUVA(picture); } else { - return WebPPictureAllocARGB(picture, width, height); + return WebPPictureAllocARGB(picture); } } return 1; diff --git a/thirdparty/libwebp/src/enc/picture_rescale_enc.c b/thirdparty/libwebp/src/enc/picture_rescale_enc.c index a75f5d9c06c..839f91cacc0 100644 --- a/thirdparty/libwebp/src/enc/picture_rescale_enc.c +++ b/thirdparty/libwebp/src/enc/picture_rescale_enc.c @@ -13,14 +13,15 @@ #include "src/webp/encode.h" -#if !defined(WEBP_REDUCE_SIZE) - #include #include #include "src/enc/vp8i_enc.h" + +#if !defined(WEBP_REDUCE_SIZE) #include "src/utils/rescaler_utils.h" #include "src/utils/utils.h" +#endif // !defined(WEBP_REDUCE_SIZE) #define HALVE(x) (((x) + 1) >> 1) @@ -56,6 +57,7 @@ static int AdjustAndCheckRectangle(const WebPPicture* const pic, return 1; } +#if !defined(WEBP_REDUCE_SIZE) int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { if (src == NULL || dst == NULL) return 0; if (src == dst) return 1; @@ -81,6 +83,7 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { } return 1; } +#endif // !defined(WEBP_REDUCE_SIZE) int WebPPictureIsView(const WebPPicture* picture) { if (picture == NULL) return 0; @@ -120,6 +123,7 @@ int WebPPictureView(const WebPPicture* src, return 1; } +#if !defined(WEBP_REDUCE_SIZE) //------------------------------------------------------------------------------ // Picture cropping @@ -198,34 +202,34 @@ static void AlphaMultiplyY(WebPPicture* const pic, int inverse) { } } -int WebPPictureRescale(WebPPicture* pic, int width, int height) { +int WebPPictureRescale(WebPPicture* picture, int width, int height) { WebPPicture tmp; int prev_width, prev_height; rescaler_t* work; - if (pic == NULL) return 0; - prev_width = pic->width; - prev_height = pic->height; + if (picture == NULL) return 0; + prev_width = picture->width; + prev_height = picture->height; if (!WebPRescalerGetScaledDimensions( prev_width, prev_height, &width, &height)) { return 0; } - PictureGrabSpecs(pic, &tmp); + PictureGrabSpecs(picture, &tmp); tmp.width = width; tmp.height = height; if (!WebPPictureAlloc(&tmp)) return 0; - if (!pic->use_argb) { + if (!picture->use_argb) { work = (rescaler_t*)WebPSafeMalloc(2ULL * width, sizeof(*work)); if (work == NULL) { WebPPictureFree(&tmp); return 0; } // If present, we need to rescale alpha first (for AlphaMultiplyY). - if (pic->a != NULL) { + if (picture->a != NULL) { WebPInitAlphaProcessing(); - if (!RescalePlane(pic->a, prev_width, prev_height, pic->a_stride, + if (!RescalePlane(picture->a, prev_width, prev_height, picture->a_stride, tmp.a, width, height, tmp.a_stride, work, 1)) { return 0; } @@ -233,17 +237,15 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { // We take transparency into account on the luma plane only. That's not // totally exact blending, but still is a good approximation. - AlphaMultiplyY(pic, 0); - if (!RescalePlane(pic->y, prev_width, prev_height, pic->y_stride, + AlphaMultiplyY(picture, 0); + if (!RescalePlane(picture->y, prev_width, prev_height, picture->y_stride, tmp.y, width, height, tmp.y_stride, work, 1) || - !RescalePlane(pic->u, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.u, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1) || - !RescalePlane(pic->v, - HALVE(prev_width), HALVE(prev_height), pic->uv_stride, - tmp.v, - HALVE(width), HALVE(height), tmp.uv_stride, work, 1)) { + !RescalePlane(picture->u, HALVE(prev_width), HALVE(prev_height), + picture->uv_stride, tmp.u, HALVE(width), HALVE(height), + tmp.uv_stride, work, 1) || + !RescalePlane(picture->v, HALVE(prev_width), HALVE(prev_height), + picture->uv_stride, tmp.v, HALVE(width), HALVE(height), + tmp.uv_stride, work, 1)) { return 0; } AlphaMultiplyY(&tmp, 1); @@ -257,18 +259,17 @@ int WebPPictureRescale(WebPPicture* pic, int width, int height) { // weighting first (black-matting), scale the RGB values, and remove // the premultiplication afterward (while preserving the alpha channel). WebPInitAlphaProcessing(); - AlphaMultiplyARGB(pic, 0); - if (!RescalePlane((const uint8_t*)pic->argb, prev_width, prev_height, - pic->argb_stride * 4, - (uint8_t*)tmp.argb, width, height, - tmp.argb_stride * 4, work, 4)) { + AlphaMultiplyARGB(picture, 0); + if (!RescalePlane((const uint8_t*)picture->argb, prev_width, prev_height, + picture->argb_stride * 4, (uint8_t*)tmp.argb, width, + height, tmp.argb_stride * 4, work, 4)) { return 0; } AlphaMultiplyARGB(&tmp, 1); } - WebPPictureFree(pic); + WebPPictureFree(picture); WebPSafeFree(work); - *pic = tmp; + *picture = tmp; return 1; } @@ -280,23 +281,6 @@ int WebPPictureCopy(const WebPPicture* src, WebPPicture* dst) { return 0; } -int WebPPictureIsView(const WebPPicture* picture) { - (void)picture; - return 0; -} - -int WebPPictureView(const WebPPicture* src, - int left, int top, int width, int height, - WebPPicture* dst) { - (void)src; - (void)left; - (void)top; - (void)width; - (void)height; - (void)dst; - return 0; -} - int WebPPictureCrop(WebPPicture* pic, int left, int top, int width, int height) { (void)pic; diff --git a/thirdparty/libwebp/src/enc/picture_tools_enc.c b/thirdparty/libwebp/src/enc/picture_tools_enc.c index 38cb01534a3..147cc18608c 100644 --- a/thirdparty/libwebp/src/enc/picture_tools_enc.c +++ b/thirdparty/libwebp/src/enc/picture_tools_enc.c @@ -190,27 +190,28 @@ static WEBP_INLINE uint32_t MakeARGB32(int r, int g, int b) { return (0xff000000u | (r << 16) | (g << 8) | b); } -void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { +void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb) { const int red = (background_rgb >> 16) & 0xff; const int green = (background_rgb >> 8) & 0xff; const int blue = (background_rgb >> 0) & 0xff; int x, y; - if (pic == NULL) return; - if (!pic->use_argb) { - const int uv_width = (pic->width >> 1); // omit last pixel during u/v loop + if (picture == NULL) return; + if (!picture->use_argb) { + // omit last pixel during u/v loop + const int uv_width = (picture->width >> 1); const int Y0 = VP8RGBToY(red, green, blue, YUV_HALF); // VP8RGBToU/V expects the u/v values summed over four pixels const int U0 = VP8RGBToU(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); const int V0 = VP8RGBToV(4 * red, 4 * green, 4 * blue, 4 * YUV_HALF); - const int has_alpha = pic->colorspace & WEBP_CSP_ALPHA_BIT; - uint8_t* y_ptr = pic->y; - uint8_t* u_ptr = pic->u; - uint8_t* v_ptr = pic->v; - uint8_t* a_ptr = pic->a; + const int has_alpha = picture->colorspace & WEBP_CSP_ALPHA_BIT; + uint8_t* y_ptr = picture->y; + uint8_t* u_ptr = picture->u; + uint8_t* v_ptr = picture->v; + uint8_t* a_ptr = picture->a; if (!has_alpha || a_ptr == NULL) return; // nothing to do - for (y = 0; y < pic->height; ++y) { + for (y = 0; y < picture->height; ++y) { // Luma blending - for (x = 0; x < pic->width; ++x) { + for (x = 0; x < picture->width; ++x) { const uint8_t alpha = a_ptr[x]; if (alpha < 0xff) { y_ptr[x] = BLEND(Y0, y_ptr[x], alpha); @@ -219,7 +220,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { // Chroma blending every even line if ((y & 1) == 0) { uint8_t* const a_ptr2 = - (y + 1 == pic->height) ? a_ptr : a_ptr + pic->a_stride; + (y + 1 == picture->height) ? a_ptr : a_ptr + picture->a_stride; for (x = 0; x < uv_width; ++x) { // Average four alpha values into a single blending weight. // TODO(skal): might lead to visible contouring. Can we do better? @@ -229,24 +230,24 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha); v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha); } - if (pic->width & 1) { // rightmost pixel + if (picture->width & 1) { // rightmost pixel const uint32_t alpha = 2 * (a_ptr[2 * x + 0] + a_ptr2[2 * x + 0]); u_ptr[x] = BLEND_10BIT(U0, u_ptr[x], alpha); v_ptr[x] = BLEND_10BIT(V0, v_ptr[x], alpha); } } else { - u_ptr += pic->uv_stride; - v_ptr += pic->uv_stride; + u_ptr += picture->uv_stride; + v_ptr += picture->uv_stride; } - memset(a_ptr, 0xff, pic->width); // reset alpha value to opaque - a_ptr += pic->a_stride; - y_ptr += pic->y_stride; + memset(a_ptr, 0xff, picture->width); // reset alpha value to opaque + a_ptr += picture->a_stride; + y_ptr += picture->y_stride; } } else { - uint32_t* argb = pic->argb; + uint32_t* argb = picture->argb; const uint32_t background = MakeARGB32(red, green, blue); - for (y = 0; y < pic->height; ++y) { - for (x = 0; x < pic->width; ++x) { + for (y = 0; y < picture->height; ++y) { + for (x = 0; x < picture->width; ++x) { const int alpha = (argb[x] >> 24) & 0xff; if (alpha != 0xff) { if (alpha > 0) { @@ -262,7 +263,7 @@ void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb) { } } } - argb += pic->argb_stride; + argb += picture->argb_stride; } } } diff --git a/thirdparty/libwebp/src/enc/predictor_enc.c b/thirdparty/libwebp/src/enc/predictor_enc.c index 2b5c7672807..b3d44b59d50 100644 --- a/thirdparty/libwebp/src/enc/predictor_enc.c +++ b/thirdparty/libwebp/src/enc/predictor_enc.c @@ -16,6 +16,7 @@ #include "src/dsp/lossless.h" #include "src/dsp/lossless_common.h" +#include "src/enc/vp8i_enc.h" #include "src/enc/vp8li_enc.h" #define MAX_DIFF_COST (1e30f) @@ -31,10 +32,10 @@ static WEBP_INLINE int GetMin(int a, int b) { return (a > b) ? b : a; } // Methods to calculate Entropy (Shannon). static float PredictionCostSpatial(const int counts[256], int weight_0, - double exp_val) { + float exp_val) { const int significant_symbols = 256 >> 4; - const double exp_decay_factor = 0.6; - double bits = weight_0 * counts[0]; + const float exp_decay_factor = 0.6f; + float bits = (float)weight_0 * counts[0]; int i; for (i = 1; i < significant_symbols; ++i) { bits += exp_val * (counts[i] + counts[256 - i]); @@ -46,9 +47,9 @@ static float PredictionCostSpatial(const int counts[256], int weight_0, static float PredictionCostSpatialHistogram(const int accumulated[4][256], const int tile[4][256]) { int i; - double retval = 0; + float retval = 0.f; for (i = 0; i < 4; ++i) { - const double kExpValue = 0.94; + const float kExpValue = 0.94f; retval += PredictionCostSpatial(tile[i], 1, kExpValue); retval += VP8LCombinedShannonEntropy(tile[i], accumulated[i]); } @@ -472,12 +473,15 @@ static void CopyImageWithPrediction(int width, int height, // with respect to predictions. If near_lossless_quality < 100, applies // near lossless processing, shaving off more bits of residuals for lower // qualities. -void VP8LResidualImage(int width, int height, int bits, int low_effort, - uint32_t* const argb, uint32_t* const argb_scratch, - uint32_t* const image, int near_lossless_quality, - int exact, int used_subtract_green) { +int VP8LResidualImage(int width, int height, int bits, int low_effort, + uint32_t* const argb, uint32_t* const argb_scratch, + uint32_t* const image, int near_lossless_quality, + int exact, int used_subtract_green, + const WebPPicture* const pic, int percent_range, + int* const percent) { const int tiles_per_row = VP8LSubSampleSize(width, bits); const int tiles_per_col = VP8LSubSampleSize(height, bits); + int percent_start = *percent; int tile_y; int histo[4][256]; const int max_quantization = 1 << VP8LNearLosslessBits(near_lossless_quality); @@ -491,17 +495,24 @@ void VP8LResidualImage(int width, int height, int bits, int low_effort, for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { int tile_x; for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { - const int pred = GetBestPredictorForTile(width, height, tile_x, tile_y, - bits, histo, argb_scratch, argb, max_quantization, exact, - used_subtract_green, image); + const int pred = GetBestPredictorForTile( + width, height, tile_x, tile_y, bits, histo, argb_scratch, argb, + max_quantization, exact, used_subtract_green, image); image[tile_y * tiles_per_row + tile_x] = ARGB_BLACK | (pred << 8); } + + if (!WebPReportProgress( + pic, percent_start + percent_range * tile_y / tiles_per_col, + percent)) { + return 0; + } } } CopyImageWithPrediction(width, height, bits, image, argb_scratch, argb, low_effort, max_quantization, exact, used_subtract_green); + return WebPReportProgress(pic, percent_start + percent_range, percent); } //------------------------------------------------------------------------------ @@ -532,7 +543,7 @@ static float PredictionCostCrossColor(const int accumulated[256], const int counts[256]) { // Favor low entropy, locally and globally. // Favor small absolute values for PredictionCostSpatial - static const double kExpValue = 2.4; + static const float kExpValue = 2.4f; return VP8LCombinedShannonEntropy(counts, accumulated) + PredictionCostSpatial(counts, 3, kExpValue); } @@ -714,11 +725,14 @@ static void CopyTileWithColorTransform(int xsize, int ysize, } } -void VP8LColorSpaceTransform(int width, int height, int bits, int quality, - uint32_t* const argb, uint32_t* image) { +int VP8LColorSpaceTransform(int width, int height, int bits, int quality, + uint32_t* const argb, uint32_t* image, + const WebPPicture* const pic, int percent_range, + int* const percent) { const int max_tile_size = 1 << bits; const int tile_xsize = VP8LSubSampleSize(width, bits); const int tile_ysize = VP8LSubSampleSize(height, bits); + int percent_start = *percent; int accumulated_red_histo[256] = { 0 }; int accumulated_blue_histo[256] = { 0 }; int tile_x, tile_y; @@ -768,5 +782,11 @@ void VP8LColorSpaceTransform(int width, int height, int bits, int quality, } } } + if (!WebPReportProgress( + pic, percent_start + percent_range * tile_y / tile_ysize, + percent)) { + return 0; + } } + return 1; } diff --git a/thirdparty/libwebp/src/enc/quant_enc.c b/thirdparty/libwebp/src/enc/quant_enc.c index 6cede28ab41..6d8202d2771 100644 --- a/thirdparty/libwebp/src/enc/quant_enc.c +++ b/thirdparty/libwebp/src/enc/quant_enc.c @@ -533,7 +533,8 @@ static void InitScore(VP8ModeScore* const rd) { rd->score = MAX_COST; } -static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { +static void CopyScore(VP8ModeScore* WEBP_RESTRICT const dst, + const VP8ModeScore* WEBP_RESTRICT const src) { dst->D = src->D; dst->SD = src->SD; dst->R = src->R; @@ -542,7 +543,8 @@ static void CopyScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { dst->score = src->score; } -static void AddScore(VP8ModeScore* const dst, const VP8ModeScore* const src) { +static void AddScore(VP8ModeScore* WEBP_RESTRICT const dst, + const VP8ModeScore* WEBP_RESTRICT const src) { dst->D += src->D; dst->SD += src->SD; dst->R += src->R; @@ -588,10 +590,10 @@ static WEBP_INLINE score_t RDScoreTrellis(int lambda, score_t rate, // Coefficient type. enum { TYPE_I16_AC = 0, TYPE_I16_DC = 1, TYPE_CHROMA_A = 2, TYPE_I4_AC = 3 }; -static int TrellisQuantizeBlock(const VP8Encoder* const enc, +static int TrellisQuantizeBlock(const VP8Encoder* WEBP_RESTRICT const enc, int16_t in[16], int16_t out[16], int ctx0, int coeff_type, - const VP8Matrix* const mtx, + const VP8Matrix* WEBP_RESTRICT const mtx, int lambda) { const ProbaArray* const probas = enc->proba_.coeffs_[coeff_type]; CostArrayPtr const costs = @@ -767,9 +769,9 @@ static int TrellisQuantizeBlock(const VP8Encoder* const enc, // all at once. Output is the reconstructed block in *yuv_out, and the // quantized levels in *levels. -static int ReconstructIntra16(VP8EncIterator* const it, - VP8ModeScore* const rd, - uint8_t* const yuv_out, +static int ReconstructIntra16(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, + uint8_t* WEBP_RESTRICT const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8I16ModeOffsets[mode]; @@ -819,10 +821,10 @@ static int ReconstructIntra16(VP8EncIterator* const it, return nz; } -static int ReconstructIntra4(VP8EncIterator* const it, +static int ReconstructIntra4(VP8EncIterator* WEBP_RESTRICT const it, int16_t levels[16], - const uint8_t* const src, - uint8_t* const yuv_out, + const uint8_t* WEBP_RESTRICT const src, + uint8_t* WEBP_RESTRICT const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8I4ModeOffsets[mode]; @@ -855,7 +857,8 @@ static int ReconstructIntra4(VP8EncIterator* const it, // Quantize as usual, but also compute and return the quantization error. // Error is already divided by DSHIFT. -static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { +static int QuantizeSingle(int16_t* WEBP_RESTRICT const v, + const VP8Matrix* WEBP_RESTRICT const mtx) { int V = *v; const int sign = (V < 0); if (sign) V = -V; @@ -869,9 +872,10 @@ static int QuantizeSingle(int16_t* const v, const VP8Matrix* const mtx) { return (sign ? -V : V) >> DSCALE; } -static void CorrectDCValues(const VP8EncIterator* const it, - const VP8Matrix* const mtx, - int16_t tmp[][16], VP8ModeScore* const rd) { +static void CorrectDCValues(const VP8EncIterator* WEBP_RESTRICT const it, + const VP8Matrix* WEBP_RESTRICT const mtx, + int16_t tmp[][16], + VP8ModeScore* WEBP_RESTRICT const rd) { // | top[0] | top[1] // --------+--------+--------- // left[0] | tmp[0] tmp[1] <-> err0 err1 @@ -902,8 +906,8 @@ static void CorrectDCValues(const VP8EncIterator* const it, } } -static void StoreDiffusionErrors(VP8EncIterator* const it, - const VP8ModeScore* const rd) { +static void StoreDiffusionErrors(VP8EncIterator* WEBP_RESTRICT const it, + const VP8ModeScore* WEBP_RESTRICT const rd) { int ch; for (ch = 0; ch <= 1; ++ch) { int8_t* const top = it->top_derr_[it->x_][ch]; @@ -922,8 +926,9 @@ static void StoreDiffusionErrors(VP8EncIterator* const it, //------------------------------------------------------------------------------ -static int ReconstructUV(VP8EncIterator* const it, VP8ModeScore* const rd, - uint8_t* const yuv_out, int mode) { +static int ReconstructUV(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, + uint8_t* WEBP_RESTRICT const yuv_out, int mode) { const VP8Encoder* const enc = it->enc_; const uint8_t* const ref = it->yuv_p_ + VP8UVModeOffsets[mode]; const uint8_t* const src = it->yuv_in_ + U_OFF_ENC; @@ -994,7 +999,8 @@ static void SwapOut(VP8EncIterator* const it) { SwapPtr(&it->yuv_out_, &it->yuv_out2_); } -static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { +static void PickBestIntra16(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT rd) { const int kNumBlocks = 16; VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i16_; @@ -1054,7 +1060,7 @@ static void PickBestIntra16(VP8EncIterator* const it, VP8ModeScore* rd) { //------------------------------------------------------------------------------ // return the cost array corresponding to the surrounding prediction modes. -static const uint16_t* GetCostModeI4(VP8EncIterator* const it, +static const uint16_t* GetCostModeI4(VP8EncIterator* WEBP_RESTRICT const it, const uint8_t modes[16]) { const int preds_w = it->enc_->preds_w_; const int x = (it->i4_ & 3), y = it->i4_ >> 2; @@ -1063,7 +1069,8 @@ static const uint16_t* GetCostModeI4(VP8EncIterator* const it, return VP8FixedCostsI4[top][left]; } -static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { +static int PickBestIntra4(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd) { const VP8Encoder* const enc = it->enc_; const VP8SegmentInfo* const dqm = &enc->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_i4_; @@ -1159,7 +1166,8 @@ static int PickBestIntra4(VP8EncIterator* const it, VP8ModeScore* const rd) { //------------------------------------------------------------------------------ -static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { +static void PickBestUV(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd) { const int kNumBlocks = 8; const VP8SegmentInfo* const dqm = &it->enc_->dqm_[it->mb_->segment_]; const int lambda = dqm->lambda_uv_; @@ -1211,7 +1219,8 @@ static void PickBestUV(VP8EncIterator* const it, VP8ModeScore* const rd) { //------------------------------------------------------------------------------ // Final reconstruction and quantization. -static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { +static void SimpleQuantize(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd) { const VP8Encoder* const enc = it->enc_; const int is_i16 = (it->mb_->type_ == 1); int nz = 0; @@ -1236,9 +1245,9 @@ static void SimpleQuantize(VP8EncIterator* const it, VP8ModeScore* const rd) { } // Refine intra16/intra4 sub-modes based on distortion only (not rate). -static void RefineUsingDistortion(VP8EncIterator* const it, +static void RefineUsingDistortion(VP8EncIterator* WEBP_RESTRICT const it, int try_both_modes, int refine_uv_mode, - VP8ModeScore* const rd) { + VP8ModeScore* WEBP_RESTRICT const rd) { score_t best_score = MAX_COST; int nz = 0; int mode; @@ -1352,7 +1361,8 @@ static void RefineUsingDistortion(VP8EncIterator* const it, //------------------------------------------------------------------------------ // Entry point -int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, +int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, VP8RDLevel rd_opt) { int is_skipped; const int method = it->enc_->method_; diff --git a/thirdparty/libwebp/src/enc/vp8i_enc.h b/thirdparty/libwebp/src/enc/vp8i_enc.h index b4bba08f27e..71f76702ae2 100644 --- a/thirdparty/libwebp/src/enc/vp8i_enc.h +++ b/thirdparty/libwebp/src/enc/vp8i_enc.h @@ -32,7 +32,7 @@ extern "C" { // version numbers #define ENC_MAJ_VERSION 1 #define ENC_MIN_VERSION 2 -#define ENC_REV_VERSION 2 +#define ENC_REV_VERSION 4 enum { MAX_LF_LEVELS = 64, // Maximum loop filter level MAX_VARIABLE_LEVEL = 67, // last (inclusive) level with variable cost @@ -470,7 +470,8 @@ int VP8EncAnalyze(VP8Encoder* const enc); // Sets up segment's quantization values, base_quant_ and filter strengths. void VP8SetSegmentParams(VP8Encoder* const enc, float quality); // Pick best modes and fills the levels. Returns true if skipped. -int VP8Decimate(VP8EncIterator* const it, VP8ModeScore* const rd, +int VP8Decimate(VP8EncIterator* WEBP_RESTRICT const it, + VP8ModeScore* WEBP_RESTRICT const rd, VP8RDLevel rd_opt); // in alpha.c @@ -490,19 +491,24 @@ int VP8FilterStrengthFromDelta(int sharpness, int delta); // misc utils for picture_*.c: +// Returns true if 'picture' is non-NULL and dimensions/colorspace are within +// their valid ranges. If returning false, the 'error_code' in 'picture' is +// updated. +int WebPValidatePicture(const WebPPicture* const picture); + // Remove reference to the ARGB/YUVA buffer (doesn't free anything). void WebPPictureResetBuffers(WebPPicture* const picture); -// Allocates ARGB buffer of given dimension (previous one is always free'd). -// Preserves the YUV(A) buffer. Returns false in case of error (invalid param, -// out-of-memory). -int WebPPictureAllocARGB(WebPPicture* const picture, int width, int height); +// Allocates ARGB buffer according to set width/height (previous one is +// always free'd). Preserves the YUV(A) buffer. Returns false in case of error +// (invalid param, out-of-memory). +int WebPPictureAllocARGB(WebPPicture* const picture); -// Allocates YUVA buffer of given dimension (previous one is always free'd). -// Uses picture->csp to determine whether an alpha buffer is needed. +// Allocates YUVA buffer according to set width/height (previous one is always +// free'd). Uses picture->csp to determine whether an alpha buffer is needed. // Preserves the ARGB buffer. // Returns false in case of error (invalid param, out-of-memory). -int WebPPictureAllocYUVA(WebPPicture* const picture, int width, int height); +int WebPPictureAllocYUVA(WebPPicture* const picture); // Replace samples that are fully transparent by 'color' to help compressibility // (no guarantee, though). Assumes pic->use_argb is true. diff --git a/thirdparty/libwebp/src/enc/vp8l_enc.c b/thirdparty/libwebp/src/enc/vp8l_enc.c index e330e716f1d..2b345df6105 100644 --- a/thirdparty/libwebp/src/enc/vp8l_enc.c +++ b/thirdparty/libwebp/src/enc/vp8l_enc.c @@ -15,15 +15,16 @@ #include #include +#include "src/dsp/lossless.h" +#include "src/dsp/lossless_common.h" #include "src/enc/backward_references_enc.h" #include "src/enc/histogram_enc.h" #include "src/enc/vp8i_enc.h" #include "src/enc/vp8li_enc.h" -#include "src/dsp/lossless.h" -#include "src/dsp/lossless_common.h" #include "src/utils/bit_writer_utils.h" #include "src/utils/huffman_encode_utils.h" #include "src/utils/utils.h" +#include "src/webp/encode.h" #include "src/webp/format_constants.h" // Maximum number of histogram images (sub-blocks). @@ -183,10 +184,9 @@ static void CoOccurrenceFindMax(const uint32_t* const cooccurrence, } // Builds the cooccurrence matrix -static WebPEncodingError CoOccurrenceBuild(const WebPPicture* const pic, - const uint32_t* const palette, - uint32_t num_colors, - uint32_t* cooccurrence) { +static int CoOccurrenceBuild(const WebPPicture* const pic, + const uint32_t* const palette, uint32_t num_colors, + uint32_t* cooccurrence) { uint32_t *lines, *line_top, *line_current, *line_tmp; int x, y; const uint32_t* src = pic->argb; @@ -195,7 +195,10 @@ static WebPEncodingError CoOccurrenceBuild(const WebPPicture* const pic, uint32_t idx_map[MAX_PALETTE_SIZE] = {0}; uint32_t palette_sorted[MAX_PALETTE_SIZE]; lines = (uint32_t*)WebPSafeMalloc(2 * pic->width, sizeof(*lines)); - if (lines == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + if (lines == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } line_top = &lines[0]; line_current = &lines[pic->width]; PrepareMapToPalette(palette, num_colors, palette_sorted, idx_map); @@ -226,7 +229,7 @@ static WebPEncodingError CoOccurrenceBuild(const WebPPicture* const pic, src += pic->argb_stride; } WebPSafeFree(lines); - return VP8_ENC_OK; + return 1; } struct Sum { @@ -237,7 +240,7 @@ struct Sum { // Implements the modified Zeng method from "A Survey on Palette Reordering // Methods for Improving the Compression of Color-Indexed Images" by Armando J. // Pinho and Antonio J. R. Neves. -static WebPEncodingError PaletteSortModifiedZeng( +static int PaletteSortModifiedZeng( const WebPPicture* const pic, const uint32_t* const palette_sorted, uint32_t num_colors, uint32_t* const palette) { uint32_t i, j, ind; @@ -247,15 +250,16 @@ static WebPEncodingError PaletteSortModifiedZeng( uint32_t first, last; uint32_t num_sums; // TODO(vrabaud) check whether one color images should use palette or not. - if (num_colors <= 1) return VP8_ENC_OK; + if (num_colors <= 1) return 1; // Build the co-occurrence matrix. cooccurrence = (uint32_t*)WebPSafeCalloc(num_colors * num_colors, sizeof(*cooccurrence)); - if (cooccurrence == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; - if (CoOccurrenceBuild(pic, palette_sorted, num_colors, cooccurrence) != - VP8_ENC_OK) { - WebPSafeFree(cooccurrence); - return VP8_ENC_ERROR_OUT_OF_MEMORY; + if (cooccurrence == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } + if (!CoOccurrenceBuild(pic, palette_sorted, num_colors, cooccurrence)) { + return 0; } // Initialize the mapping list with the two best indices. @@ -316,7 +320,7 @@ static WebPEncodingError PaletteSortModifiedZeng( for (i = 0; i < num_colors; ++i) { palette[i] = palette_sorted[remapping[(first + i) % num_colors]]; } - return VP8_ENC_OK; + return 1; } // ----------------------------------------------------------------------------- @@ -434,8 +438,8 @@ static int AnalyzeEntropy(const uint32_t* argb, curr_row += argb_stride; } { - double entropy_comp[kHistoTotal]; - double entropy[kNumEntropyIx]; + float entropy_comp[kHistoTotal]; + float entropy[kNumEntropyIx]; int k; int last_mode_to_analyze = use_palette ? kPalette : kSpatialSubGreen; int j; @@ -949,11 +953,11 @@ static WEBP_INLINE void WriteHuffmanCodeWithExtraBits( VP8LPutBits(bw, (bits << depth) | symbol, depth + n_bits); } -static WebPEncodingError StoreImageToBitMask( +static int StoreImageToBitMask( VP8LBitWriter* const bw, int width, int histo_bits, const VP8LBackwardRefs* const refs, const uint16_t* histogram_symbols, - const HuffmanTreeCode* const huffman_codes) { + const HuffmanTreeCode* const huffman_codes, const WebPPicture* const pic) { const int histo_xsize = histo_bits ? VP8LSubSampleSize(width, histo_bits) : 1; const int tile_mask = (histo_bits == 0) ? 0 : -(1 << histo_bits); // x and y trace the position in the image. @@ -1006,44 +1010,53 @@ static WebPEncodingError StoreImageToBitMask( } VP8LRefsCursorNext(&c); } - return bw->error_ ? VP8_ENC_ERROR_OUT_OF_MEMORY : VP8_ENC_OK; + if (bw->error_) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } + return 1; } -// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31 -static WebPEncodingError EncodeImageNoHuffman( - VP8LBitWriter* const bw, const uint32_t* const argb, - VP8LHashChain* const hash_chain, VP8LBackwardRefs* const refs_array, - int width, int height, int quality, int low_effort) { +// Special case of EncodeImageInternal() for cache-bits=0, histo_bits=31. +// pic and percent are for progress. +static int EncodeImageNoHuffman(VP8LBitWriter* const bw, + const uint32_t* const argb, + VP8LHashChain* const hash_chain, + VP8LBackwardRefs* const refs_array, int width, + int height, int quality, int low_effort, + const WebPPicture* const pic, int percent_range, + int* const percent) { int i; int max_tokens = 0; - WebPEncodingError err = VP8_ENC_OK; VP8LBackwardRefs* refs; HuffmanTreeToken* tokens = NULL; - HuffmanTreeCode huffman_codes[5] = { { 0, NULL, NULL } }; - const uint16_t histogram_symbols[1] = { 0 }; // only one tree, one symbol + HuffmanTreeCode huffman_codes[5] = {{0, NULL, NULL}}; + const uint16_t histogram_symbols[1] = {0}; // only one tree, one symbol int cache_bits = 0; VP8LHistogramSet* histogram_image = NULL; HuffmanTree* const huff_tree = (HuffmanTree*)WebPSafeMalloc( - 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); + 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); if (huff_tree == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } // Calculate backward references from ARGB image. - if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, - low_effort)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, low_effort, + pic, percent_range / 2, percent)) { + goto Error; + } + if (!VP8LGetBackwardReferences(width, height, argb, quality, /*low_effort=*/0, + kLZ77Standard | kLZ77RLE, cache_bits, + /*do_no_cache=*/0, hash_chain, refs_array, + &cache_bits, pic, + percent_range - percent_range / 2, percent)) { goto Error; } - err = VP8LGetBackwardReferences( - width, height, argb, quality, /*low_effort=*/0, kLZ77Standard | kLZ77RLE, - cache_bits, /*do_no_cache=*/0, hash_chain, refs_array, &cache_bits); - if (err != VP8_ENC_OK) goto Error; refs = &refs_array[0]; histogram_image = VP8LAllocateHistogramSet(1, cache_bits); if (histogram_image == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } VP8LHistogramSetClear(histogram_image); @@ -1054,7 +1067,7 @@ static WebPEncodingError EncodeImageNoHuffman( // Create Huffman bit lengths and codes for each histogram image. assert(histogram_image->size == 1); if (!GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } @@ -1071,7 +1084,7 @@ static WebPEncodingError EncodeImageNoHuffman( tokens = (HuffmanTreeToken*)WebPSafeMalloc(max_tokens, sizeof(*tokens)); if (tokens == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } @@ -1083,27 +1096,32 @@ static WebPEncodingError EncodeImageNoHuffman( } // Store actual literals. - err = StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, - huffman_codes); + if (!StoreImageToBitMask(bw, width, 0, refs, histogram_symbols, huffman_codes, + pic)) { + goto Error; + } Error: WebPSafeFree(tokens); WebPSafeFree(huff_tree); VP8LFreeHistogramSet(histogram_image); WebPSafeFree(huffman_codes[0].codes); - return err; + return (pic->error_code == VP8_ENC_OK); } -static WebPEncodingError EncodeImageInternal( +// pic and percent are for progress. +static int EncodeImageInternal( VP8LBitWriter* const bw, const uint32_t* const argb, VP8LHashChain* const hash_chain, VP8LBackwardRefs refs_array[4], int width, int height, int quality, int low_effort, int use_cache, const CrunchConfig* const config, int* cache_bits, int histogram_bits, - size_t init_byte_position, int* const hdr_size, int* const data_size) { - WebPEncodingError err = VP8_ENC_ERROR_OUT_OF_MEMORY; + size_t init_byte_position, int* const hdr_size, int* const data_size, + const WebPPicture* const pic, int percent_range, int* const percent) { const uint32_t histogram_image_xysize = VP8LSubSampleSize(width, histogram_bits) * VP8LSubSampleSize(height, histogram_bits); + int remaining_percent = percent_range; + int percent_start = *percent; VP8LHistogramSet* histogram_image = NULL; VP8LHistogram* tmp_histo = NULL; int histogram_image_size = 0; @@ -1112,9 +1130,8 @@ static WebPEncodingError EncodeImageInternal( 3ULL * CODE_LENGTH_CODES, sizeof(*huff_tree)); HuffmanTreeToken* tokens = NULL; HuffmanTreeCode* huffman_codes = NULL; - uint16_t* const histogram_symbols = - (uint16_t*)WebPSafeMalloc(histogram_image_xysize, - sizeof(*histogram_symbols)); + uint16_t* const histogram_symbols = (uint16_t*)WebPSafeMalloc( + histogram_image_xysize, sizeof(*histogram_symbols)); int sub_configs_idx; int cache_bits_init, write_histogram_image; VP8LBitWriter bw_init = *bw, bw_best; @@ -1126,14 +1143,27 @@ static WebPEncodingError EncodeImageInternal( assert(hdr_size != NULL); assert(data_size != NULL); - // Make sure we can allocate the different objects. memset(&hash_chain_histogram, 0, sizeof(hash_chain_histogram)); - if (huff_tree == NULL || histogram_symbols == NULL || - !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize) || - !VP8LHashChainFill(hash_chain, quality, argb, width, height, - low_effort)) { + if (!VP8LBitWriterInit(&bw_best, 0)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } + + // Make sure we can allocate the different objects. + if (huff_tree == NULL || histogram_symbols == NULL || + !VP8LHashChainInit(&hash_chain_histogram, histogram_image_xysize)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } + + percent_range = remaining_percent / 5; + if (!VP8LHashChainFill(hash_chain, quality, argb, width, height, + low_effort, pic, percent_range, percent)) { + goto Error; + } + percent_start += percent_range; + remaining_percent -= percent_range; + if (use_cache) { // If the value is different from zero, it has been set during the // palette analysis. @@ -1142,22 +1172,27 @@ static WebPEncodingError EncodeImageInternal( cache_bits_init = 0; } // If several iterations will happen, clone into bw_best. - if (!VP8LBitWriterInit(&bw_best, 0) || - ((config->sub_configs_size_ > 1 || - config->sub_configs_[0].do_no_cache_) && - !VP8LBitWriterClone(bw, &bw_best))) { + if ((config->sub_configs_size_ > 1 || config->sub_configs_[0].do_no_cache_) && + !VP8LBitWriterClone(bw, &bw_best)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } + for (sub_configs_idx = 0; sub_configs_idx < config->sub_configs_size_; ++sub_configs_idx) { const CrunchSubConfig* const sub_config = &config->sub_configs_[sub_configs_idx]; int cache_bits_best, i_cache; - err = VP8LGetBackwardReferences(width, height, argb, quality, low_effort, - sub_config->lz77_, cache_bits_init, - sub_config->do_no_cache_, hash_chain, - &refs_array[0], &cache_bits_best); - if (err != VP8_ENC_OK) goto Error; + int i_remaining_percent = remaining_percent / config->sub_configs_size_; + int i_percent_range = i_remaining_percent / 4; + i_remaining_percent -= i_percent_range; + + if (!VP8LGetBackwardReferences( + width, height, argb, quality, low_effort, sub_config->lz77_, + cache_bits_init, sub_config->do_no_cache_, hash_chain, + &refs_array[0], &cache_bits_best, pic, i_percent_range, percent)) { + goto Error; + } for (i_cache = 0; i_cache < (sub_config->do_no_cache_ ? 2 : 1); ++i_cache) { const int cache_bits_tmp = (i_cache == 0) ? cache_bits_best : 0; @@ -1172,11 +1207,17 @@ static WebPEncodingError EncodeImageInternal( histogram_image = VP8LAllocateHistogramSet(histogram_image_xysize, cache_bits_tmp); tmp_histo = VP8LAllocateHistogram(cache_bits_tmp); - if (histogram_image == NULL || tmp_histo == NULL || - !VP8LGetHistoImageSymbols(width, height, &refs_array[i_cache], - quality, low_effort, histogram_bits, - cache_bits_tmp, histogram_image, tmp_histo, - histogram_symbols)) { + if (histogram_image == NULL || tmp_histo == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } + + i_percent_range = i_remaining_percent / 3; + i_remaining_percent -= i_percent_range; + if (!VP8LGetHistoImageSymbols( + width, height, &refs_array[i_cache], quality, low_effort, + histogram_bits, cache_bits_tmp, histogram_image, tmp_histo, + histogram_symbols, pic, i_percent_range, percent)) { goto Error; } // Create Huffman bit lengths and codes for each histogram image. @@ -1189,6 +1230,7 @@ static WebPEncodingError EncodeImageInternal( // GetHuffBitLengthsAndCodes(). if (huffman_codes == NULL || !GetHuffBitLengthsAndCodes(histogram_image, huffman_codes)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } // Free combined histograms. @@ -1211,12 +1253,14 @@ static WebPEncodingError EncodeImageInternal( write_histogram_image = (histogram_image_size > 1); VP8LPutBits(bw, write_histogram_image, 1); if (write_histogram_image) { - uint32_t* const histogram_argb = - (uint32_t*)WebPSafeMalloc(histogram_image_xysize, - sizeof(*histogram_argb)); + uint32_t* const histogram_argb = (uint32_t*)WebPSafeMalloc( + histogram_image_xysize, sizeof(*histogram_argb)); int max_index = 0; uint32_t i; - if (histogram_argb == NULL) goto Error; + if (histogram_argb == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + goto Error; + } for (i = 0; i < histogram_image_xysize; ++i) { const int symbol_index = histogram_symbols[i] & 0xffff; histogram_argb[i] = (symbol_index << 8); @@ -1227,12 +1271,17 @@ static WebPEncodingError EncodeImageInternal( histogram_image_size = max_index; VP8LPutBits(bw, histogram_bits - 2, 3); - err = EncodeImageNoHuffman( - bw, histogram_argb, &hash_chain_histogram, &refs_array[2], - VP8LSubSampleSize(width, histogram_bits), - VP8LSubSampleSize(height, histogram_bits), quality, low_effort); + i_percent_range = i_remaining_percent / 2; + i_remaining_percent -= i_percent_range; + if (!EncodeImageNoHuffman( + bw, histogram_argb, &hash_chain_histogram, &refs_array[2], + VP8LSubSampleSize(width, histogram_bits), + VP8LSubSampleSize(height, histogram_bits), quality, low_effort, + pic, i_percent_range, percent)) { + WebPSafeFree(histogram_argb); + goto Error; + } WebPSafeFree(histogram_argb); - if (err != VP8_ENC_OK) goto Error; } // Store Huffman codes. @@ -1256,9 +1305,10 @@ static WebPEncodingError EncodeImageInternal( } // Store actual literals. hdr_size_tmp = (int)(VP8LBitWriterNumBytes(bw) - init_byte_position); - err = StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache], - histogram_symbols, huffman_codes); - if (err != VP8_ENC_OK) goto Error; + if (!StoreImageToBitMask(bw, width, histogram_bits, &refs_array[i_cache], + histogram_symbols, huffman_codes, pic)) { + goto Error; + } // Keep track of the smallest image so far. if (VP8LBitWriterNumBytes(bw) < bw_size_best) { bw_size_best = VP8LBitWriterNumBytes(bw); @@ -1278,7 +1328,10 @@ static WebPEncodingError EncodeImageInternal( } } VP8LBitWriterSwap(bw, &bw_best); - err = VP8_ENC_OK; + + if (!WebPReportProgress(pic, percent_start + remaining_percent, percent)) { + goto Error; + } Error: WebPSafeFree(tokens); @@ -1292,7 +1345,7 @@ static WebPEncodingError EncodeImageInternal( } WebPSafeFree(histogram_symbols); VP8LBitWriterWipeOut(&bw_best); - return err; + return (pic->error_code == VP8_ENC_OK); } // ----------------------------------------------------------------------------- @@ -1305,22 +1358,23 @@ static void ApplySubtractGreen(VP8LEncoder* const enc, int width, int height, VP8LSubtractGreenFromBlueAndRed(enc->argb_, width * height); } -static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, - int width, int height, - int quality, int low_effort, - int used_subtract_green, - VP8LBitWriter* const bw) { +static int ApplyPredictFilter(const VP8LEncoder* const enc, int width, + int height, int quality, int low_effort, + int used_subtract_green, VP8LBitWriter* const bw, + int percent_range, int* const percent) { const int pred_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, pred_bits); const int transform_height = VP8LSubSampleSize(height, pred_bits); // we disable near-lossless quantization if palette is used. - const int near_lossless_strength = enc->use_palette_ ? 100 - : enc->config_->near_lossless; + const int near_lossless_strength = + enc->use_palette_ ? 100 : enc->config_->near_lossless; - VP8LResidualImage(width, height, pred_bits, low_effort, enc->argb_, - enc->argb_scratch_, enc->transform_data_, - near_lossless_strength, enc->config_->exact, - used_subtract_green); + if (!VP8LResidualImage( + width, height, pred_bits, low_effort, enc->argb_, enc->argb_scratch_, + enc->transform_data_, near_lossless_strength, enc->config_->exact, + used_subtract_green, enc->pic_, percent_range / 2, percent)) { + return 0; + } VP8LPutBits(bw, TRANSFORM_PRESENT, 1); VP8LPutBits(bw, PREDICTOR_TRANSFORM, 2); assert(pred_bits >= 2); @@ -1328,19 +1382,23 @@ static WebPEncodingError ApplyPredictFilter(const VP8LEncoder* const enc, return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, - quality, low_effort); + quality, low_effort, enc->pic_, percent_range - percent_range / 2, + percent); } -static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, - int width, int height, - int quality, int low_effort, - VP8LBitWriter* const bw) { +static int ApplyCrossColorFilter(const VP8LEncoder* const enc, int width, + int height, int quality, int low_effort, + VP8LBitWriter* const bw, int percent_range, + int* const percent) { const int ccolor_transform_bits = enc->transform_bits_; const int transform_width = VP8LSubSampleSize(width, ccolor_transform_bits); const int transform_height = VP8LSubSampleSize(height, ccolor_transform_bits); - VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, - enc->argb_, enc->transform_data_); + if (!VP8LColorSpaceTransform(width, height, ccolor_transform_bits, quality, + enc->argb_, enc->transform_data_, enc->pic_, + percent_range / 2, percent)) { + return 0; + } VP8LPutBits(bw, TRANSFORM_PRESENT, 1); VP8LPutBits(bw, CROSS_COLOR_TRANSFORM, 2); assert(ccolor_transform_bits >= 2); @@ -1348,23 +1406,21 @@ static WebPEncodingError ApplyCrossColorFilter(const VP8LEncoder* const enc, return EncodeImageNoHuffman( bw, enc->transform_data_, (VP8LHashChain*)&enc->hash_chain_, (VP8LBackwardRefs*)&enc->refs_[0], transform_width, transform_height, - quality, low_effort); + quality, low_effort, enc->pic_, percent_range - percent_range / 2, + percent); } // ----------------------------------------------------------------------------- -static WebPEncodingError WriteRiffHeader(const WebPPicture* const pic, - size_t riff_size, size_t vp8l_size) { +static int WriteRiffHeader(const WebPPicture* const pic, size_t riff_size, + size_t vp8l_size) { uint8_t riff[RIFF_HEADER_SIZE + CHUNK_HEADER_SIZE + VP8L_SIGNATURE_SIZE] = { 'R', 'I', 'F', 'F', 0, 0, 0, 0, 'W', 'E', 'B', 'P', 'V', 'P', '8', 'L', 0, 0, 0, 0, VP8L_MAGIC_BYTE, }; PutLE32(riff + TAG_SIZE, (uint32_t)riff_size); PutLE32(riff + RIFF_HEADER_SIZE + TAG_SIZE, (uint32_t)vp8l_size); - if (!pic->writer(riff, sizeof(riff), pic)) { - return VP8_ENC_ERROR_BAD_WRITE; - } - return VP8_ENC_OK; + return pic->writer(riff, sizeof(riff), pic); } static int WriteImageSize(const WebPPicture* const pic, @@ -1384,36 +1440,29 @@ static int WriteRealAlphaAndVersion(VP8LBitWriter* const bw, int has_alpha) { return !bw->error_; } -static WebPEncodingError WriteImage(const WebPPicture* const pic, - VP8LBitWriter* const bw, - size_t* const coded_size) { - WebPEncodingError err = VP8_ENC_OK; +static int WriteImage(const WebPPicture* const pic, VP8LBitWriter* const bw, + size_t* const coded_size) { const uint8_t* const webpll_data = VP8LBitWriterFinish(bw); const size_t webpll_size = VP8LBitWriterNumBytes(bw); const size_t vp8l_size = VP8L_SIGNATURE_SIZE + webpll_size; const size_t pad = vp8l_size & 1; const size_t riff_size = TAG_SIZE + CHUNK_HEADER_SIZE + vp8l_size + pad; - err = WriteRiffHeader(pic, riff_size, vp8l_size); - if (err != VP8_ENC_OK) goto Error; - - if (!pic->writer(webpll_data, webpll_size, pic)) { - err = VP8_ENC_ERROR_BAD_WRITE; - goto Error; + if (!WriteRiffHeader(pic, riff_size, vp8l_size) || + !pic->writer(webpll_data, webpll_size, pic)) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); + return 0; } if (pad) { const uint8_t pad_byte[1] = { 0 }; if (!pic->writer(pad_byte, 1, pic)) { - err = VP8_ENC_ERROR_BAD_WRITE; - goto Error; + WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_WRITE); + return 0; } } *coded_size = CHUNK_HEADER_SIZE + riff_size; - return VP8_ENC_OK; - - Error: - return err; + return 1; } // ----------------------------------------------------------------------------- @@ -1429,18 +1478,16 @@ static void ClearTransformBuffer(VP8LEncoder* const enc) { // Flags influencing the memory allocated: // enc->transform_bits_ // enc->use_predict_, enc->use_cross_color_ -static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, - int width, int height) { - WebPEncodingError err = VP8_ENC_OK; +static int AllocateTransformBuffer(VP8LEncoder* const enc, int width, + int height) { const uint64_t image_size = width * height; // VP8LResidualImage needs room for 2 scanlines of uint32 pixels with an extra // pixel in each, plus 2 regular scanlines of bytes. // TODO(skal): Clean up by using arithmetic in bytes instead of words. const uint64_t argb_scratch_size = - enc->use_predict_ - ? (width + 1) * 2 + - (width * 2 + sizeof(uint32_t) - 1) / sizeof(uint32_t) - : 0; + enc->use_predict_ ? (width + 1) * 2 + (width * 2 + sizeof(uint32_t) - 1) / + sizeof(uint32_t) + : 0; const uint64_t transform_data_size = (enc->use_predict_ || enc->use_cross_color_) ? VP8LSubSampleSize(width, enc->transform_bits_) * @@ -1448,17 +1495,16 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, : 0; const uint64_t max_alignment_in_words = (WEBP_ALIGN_CST + sizeof(uint32_t) - 1) / sizeof(uint32_t); - const uint64_t mem_size = - image_size + max_alignment_in_words + - argb_scratch_size + max_alignment_in_words + - transform_data_size; + const uint64_t mem_size = image_size + max_alignment_in_words + + argb_scratch_size + max_alignment_in_words + + transform_data_size; uint32_t* mem = enc->transform_mem_; if (mem == NULL || mem_size > enc->transform_mem_size_) { ClearTransformBuffer(enc); mem = (uint32_t*)WebPSafeMalloc(mem_size, sizeof(*mem)); if (mem == NULL) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; - goto Error; + WebPEncodingSetError(enc->pic_, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; } enc->transform_mem_ = mem; enc->transform_mem_size_ = (size_t)mem_size; @@ -1471,19 +1517,16 @@ static WebPEncodingError AllocateTransformBuffer(VP8LEncoder* const enc, enc->transform_data_ = mem; enc->current_width_ = width; - Error: - return err; + return 1; } -static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { - WebPEncodingError err = VP8_ENC_OK; +static int MakeInputImageCopy(VP8LEncoder* const enc) { const WebPPicture* const picture = enc->pic_; const int width = picture->width; const int height = picture->height; - err = AllocateTransformBuffer(enc, width, height); - if (err != VP8_ENC_OK) return err; - if (enc->argb_content_ == kEncoderARGB) return VP8_ENC_OK; + if (!AllocateTransformBuffer(enc, width, height)) return 0; + if (enc->argb_content_ == kEncoderARGB) return 1; { uint32_t* dst = enc->argb_; @@ -1497,7 +1540,7 @@ static WebPEncodingError MakeInputImageCopy(VP8LEncoder* const enc) { } enc->argb_content_ = kEncoderARGB; assert(enc->current_width_ == width); - return VP8_ENC_OK; + return 1; } // ----------------------------------------------------------------------------- @@ -1559,16 +1602,19 @@ static WEBP_INLINE uint32_t ApplyPaletteHash2(uint32_t color) { // using 'row' as a temporary buffer of size 'width'. // We assume that all src[] values have a corresponding entry in the palette. // Note: src[] can be the same as dst[] -static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, - uint32_t* dst, uint32_t dst_stride, - const uint32_t* palette, int palette_size, - int width, int height, int xbits) { +static int ApplyPalette(const uint32_t* src, uint32_t src_stride, uint32_t* dst, + uint32_t dst_stride, const uint32_t* palette, + int palette_size, int width, int height, int xbits, + const WebPPicture* const pic) { // TODO(skal): this tmp buffer is not needed if VP8LBundleColorMap() can be // made to work in-place. uint8_t* const tmp_row = (uint8_t*)WebPSafeMalloc(width, sizeof(*tmp_row)); int x, y; - if (tmp_row == NULL) return VP8_ENC_ERROR_OUT_OF_MEMORY; + if (tmp_row == NULL) { + WebPEncodingSetError(pic, VP8_ENC_ERROR_OUT_OF_MEMORY); + return 0; + } if (palette_size < APPLY_PALETTE_GREEDY_MAX) { APPLY_PALETTE_FOR(SearchColorGreedy(palette, palette_size, pix)); @@ -1613,7 +1659,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, } } WebPSafeFree(tmp_row); - return VP8_ENC_OK; + return 1; } #undef APPLY_PALETTE_FOR #undef PALETTE_INV_SIZE_BITS @@ -1621,9 +1667,7 @@ static WebPEncodingError ApplyPalette(const uint32_t* src, uint32_t src_stride, #undef APPLY_PALETTE_GREEDY_MAX // Note: Expects "enc->palette_" to be set properly. -static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, - int in_place) { - WebPEncodingError err = VP8_ENC_OK; +static int MapImageFromPalette(VP8LEncoder* const enc, int in_place) { const WebPPicture* const pic = enc->pic_; const int width = pic->width; const int height = pic->height; @@ -1641,19 +1685,22 @@ static WebPEncodingError MapImageFromPalette(VP8LEncoder* const enc, xbits = (palette_size <= 16) ? 1 : 0; } - err = AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height); - if (err != VP8_ENC_OK) return err; - - err = ApplyPalette(src, src_stride, + if (!AllocateTransformBuffer(enc, VP8LSubSampleSize(width, xbits), height)) { + return 0; + } + if (!ApplyPalette(src, src_stride, enc->argb_, enc->current_width_, - palette, palette_size, width, height, xbits); + palette, palette_size, width, height, xbits, pic)) { + return 0; + } enc->argb_content_ = kEncoderPalette; - return err; + return 1; } // Save palette_[] to bitstream. static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, - VP8LEncoder* const enc) { + VP8LEncoder* const enc, + int percent_range, int* const percent) { int i; uint32_t tmp_palette[MAX_PALETTE_SIZE]; const int palette_size = enc->palette_size_; @@ -1668,7 +1715,7 @@ static WebPEncodingError EncodePalette(VP8LBitWriter* const bw, int low_effort, tmp_palette[0] = palette[0]; return EncodeImageNoHuffman(bw, tmp_palette, &enc->hash_chain_, &enc->refs_[0], palette_size, 1, /*quality=*/20, - low_effort); + low_effort, enc->pic_, percent_range, percent); } // ----------------------------------------------------------------------------- @@ -1712,7 +1759,6 @@ typedef struct { CrunchConfig crunch_configs_[CRUNCH_CONFIGS_MAX]; int num_crunch_configs_; int red_and_blue_always_zero_; - WebPEncodingError err_; WebPAuxStats* stats_; } StreamEncodeContext; @@ -1729,7 +1775,6 @@ static int EncodeStreamHook(void* input, void* data2) { #if !defined(WEBP_DISABLE_STATS) WebPAuxStats* const stats = params->stats_; #endif - WebPEncodingError err = VP8_ENC_OK; const int quality = (int)config->quality; const int low_effort = (config->method == 0); #if (WEBP_NEAR_LOSSLESS == 1) @@ -1737,6 +1782,7 @@ static int EncodeStreamHook(void* input, void* data2) { #endif const int height = picture->height; const size_t byte_position = VP8LBitWriterNumBytes(bw); + int percent = 2; // for WebPProgressHook #if (WEBP_NEAR_LOSSLESS == 1) int use_near_lossless = 0; #endif @@ -1750,12 +1796,13 @@ static int EncodeStreamHook(void* input, void* data2) { if (!VP8LBitWriterInit(&bw_best, 0) || (num_crunch_configs > 1 && !VP8LBitWriterClone(bw, &bw_best))) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } for (idx = 0; idx < num_crunch_configs; ++idx) { const int entropy_idx = crunch_configs[idx].entropy_idx_; + int remaining_percent = 97 / num_crunch_configs, percent_range; enc->use_palette_ = (entropy_idx == kPalette) || (entropy_idx == kPaletteAndSpatial); enc->use_subtract_green_ = @@ -1779,11 +1826,10 @@ static int EncodeStreamHook(void* input, void* data2) { use_near_lossless = (config->near_lossless < 100) && !enc->use_palette_ && !enc->use_predict_; if (use_near_lossless) { - err = AllocateTransformBuffer(enc, width, height); - if (err != VP8_ENC_OK) goto Error; + if (!AllocateTransformBuffer(enc, width, height)) goto Error; if ((enc->argb_content_ != kEncoderNearLossless) && !VP8ApplyNearLossless(picture, config->near_lossless, enc->argb_)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } enc->argb_content_ = kEncoderNearLossless; @@ -1805,14 +1851,17 @@ static int EncodeStreamHook(void* input, void* data2) { enc->palette_); } else { assert(crunch_configs[idx].palette_sorting_type_ == kModifiedZeng); - err = PaletteSortModifiedZeng(enc->pic_, enc->palette_sorted_, - enc->palette_size_, enc->palette_); - if (err != VP8_ENC_OK) goto Error; + if (!PaletteSortModifiedZeng(enc->pic_, enc->palette_sorted_, + enc->palette_size_, enc->palette_)) { + goto Error; + } } - err = EncodePalette(bw, low_effort, enc); - if (err != VP8_ENC_OK) goto Error; - err = MapImageFromPalette(enc, use_delta_palette); - if (err != VP8_ENC_OK) goto Error; + percent_range = remaining_percent / 4; + if (!EncodePalette(bw, low_effort, enc, percent_range, &percent)) { + goto Error; + } + remaining_percent -= percent_range; + if (!MapImageFromPalette(enc, use_delta_palette)) goto Error; // If using a color cache, do not have it bigger than the number of // colors. if (use_cache && enc->palette_size_ < (1 << MAX_COLOR_CACHE_BITS)) { @@ -1823,8 +1872,7 @@ static int EncodeStreamHook(void* input, void* data2) { // In case image is not packed. if (enc->argb_content_ != kEncoderNearLossless && enc->argb_content_ != kEncoderPalette) { - err = MakeInputImageCopy(enc); - if (err != VP8_ENC_OK) goto Error; + if (!MakeInputImageCopy(enc)) goto Error; } // ----------------------------------------------------------------------- @@ -1835,15 +1883,22 @@ static int EncodeStreamHook(void* input, void* data2) { } if (enc->use_predict_) { - err = ApplyPredictFilter(enc, enc->current_width_, height, quality, - low_effort, enc->use_subtract_green_, bw); - if (err != VP8_ENC_OK) goto Error; + percent_range = remaining_percent / 3; + if (!ApplyPredictFilter(enc, enc->current_width_, height, quality, + low_effort, enc->use_subtract_green_, bw, + percent_range, &percent)) { + goto Error; + } + remaining_percent -= percent_range; } if (enc->use_cross_color_) { - err = ApplyCrossColorFilter(enc, enc->current_width_, height, quality, - low_effort, bw); - if (err != VP8_ENC_OK) goto Error; + percent_range = remaining_percent / 2; + if (!ApplyCrossColorFilter(enc, enc->current_width_, height, quality, + low_effort, bw, percent_range, &percent)) { + goto Error; + } + remaining_percent -= percent_range; } } @@ -1851,12 +1906,13 @@ static int EncodeStreamHook(void* input, void* data2) { // ------------------------------------------------------------------------- // Encode and write the transformed image. - err = EncodeImageInternal(bw, enc->argb_, &enc->hash_chain_, enc->refs_, - enc->current_width_, height, quality, low_effort, - use_cache, &crunch_configs[idx], - &enc->cache_bits_, enc->histo_bits_, - byte_position, &hdr_size, &data_size); - if (err != VP8_ENC_OK) goto Error; + if (!EncodeImageInternal( + bw, enc->argb_, &enc->hash_chain_, enc->refs_, enc->current_width_, + height, quality, low_effort, use_cache, &crunch_configs[idx], + &enc->cache_bits_, enc->histo_bits_, byte_position, &hdr_size, + &data_size, picture, remaining_percent, &percent)) { + goto Error; + } // If we are better than what we already have. if (VP8LBitWriterNumBytes(bw) < best_size) { @@ -1886,18 +1942,15 @@ static int EncodeStreamHook(void* input, void* data2) { } VP8LBitWriterSwap(&bw_best, bw); -Error: + Error: VP8LBitWriterWipeOut(&bw_best); - params->err_ = err; // The hook should return false in case of error. - return (err == VP8_ENC_OK); + return (params->picture_->error_code == VP8_ENC_OK); } -WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - const WebPPicture* const picture, - VP8LBitWriter* const bw_main, - int use_cache) { - WebPEncodingError err = VP8_ENC_OK; +int VP8LEncodeStream(const WebPConfig* const config, + const WebPPicture* const picture, + VP8LBitWriter* const bw_main, int use_cache) { VP8LEncoder* const enc_main = VP8LEncoderNew(config, picture); VP8LEncoder* enc_side = NULL; CrunchConfig crunch_configs[CRUNCH_CONFIGS_MAX]; @@ -1909,15 +1962,24 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // The main thread uses picture->stats, the side thread uses stats_side. WebPAuxStats stats_side; VP8LBitWriter bw_side; + WebPPicture picture_side; const WebPWorkerInterface* const worker_interface = WebPGetWorkerInterface(); int ok_main; + if (enc_main == NULL || !VP8LBitWriterInit(&bw_side, 0)) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); + VP8LEncoderDelete(enc_main); + return 0; + } + + // Avoid "garbage value" error from Clang's static analysis tool. + WebPPictureInit(&picture_side); + // Analyze image (entropy, num_palettes etc) - if (enc_main == NULL || - !EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main, + if (!EncoderAnalyze(enc_main, crunch_configs, &num_crunch_configs_main, &red_and_blue_always_zero) || - !EncoderInit(enc_main) || !VP8LBitWriterInit(&bw_side, 0)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + !EncoderInit(enc_main)) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } @@ -1946,25 +2008,32 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, StreamEncodeContext* const param = (idx == 0) ? ¶ms_main : ¶ms_side; param->config_ = config; - param->picture_ = picture; param->use_cache_ = use_cache; param->red_and_blue_always_zero_ = red_and_blue_always_zero; if (idx == 0) { + param->picture_ = picture; param->stats_ = picture->stats; param->bw_ = bw_main; param->enc_ = enc_main; } else { + // Create a side picture (error_code is not thread-safe). + if (!WebPPictureView(picture, /*left=*/0, /*top=*/0, picture->width, + picture->height, &picture_side)) { + assert(0); + } + picture_side.progress_hook = NULL; // Progress hook is not thread-safe. + param->picture_ = &picture_side; // No need to free a view afterwards. param->stats_ = (picture->stats == NULL) ? NULL : &stats_side; // Create a side bit writer. if (!VP8LBitWriterClone(bw_main, &bw_side)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } param->bw_ = &bw_side; // Create a side encoder. - enc_side = VP8LEncoderNew(config, picture); + enc_side = VP8LEncoderNew(config, &picture_side); if (enc_side == NULL || !EncoderInit(enc_side)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } // Copy the values that were computed for the main encoder. @@ -1988,7 +2057,7 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, // Start the second thread if needed. if (num_crunch_configs_side != 0) { if (!worker_interface->Reset(&worker_side)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } #if !defined(WEBP_DISABLE_STATS) @@ -1998,8 +2067,6 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, memcpy(&stats_side, picture->stats, sizeof(stats_side)); } #endif - // This line is only useful to remove a Clang static analyzer warning. - params_side.err_ = VP8_ENC_OK; worker_interface->Launch(&worker_side); } // Execute the main thread. @@ -2011,7 +2078,10 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, const int ok_side = worker_interface->Sync(&worker_side); worker_interface->End(&worker_side); if (!ok_main || !ok_side) { - err = ok_main ? params_side.err_ : params_main.err_; + if (picture->error_code == VP8_ENC_OK) { + assert(picture_side.error_code != VP8_ENC_OK); + WebPEncodingSetError(picture, picture_side.error_code); + } goto Error; } if (VP8LBitWriterNumBytes(&bw_side) < VP8LBitWriterNumBytes(bw_main)) { @@ -2022,18 +2092,13 @@ WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, } #endif } - } else { - if (!ok_main) { - err = params_main.err_; - goto Error; - } } -Error: + Error: VP8LBitWriterWipeOut(&bw_side); VP8LEncoderDelete(enc_main); VP8LEncoderDelete(enc_side); - return err; + return (picture->error_code == VP8_ENC_OK); } #undef CRUNCH_CONFIGS_MAX @@ -2046,14 +2111,12 @@ int VP8LEncodeImage(const WebPConfig* const config, size_t coded_size; int percent = 0; int initial_size; - WebPEncodingError err = VP8_ENC_OK; VP8LBitWriter bw; if (picture == NULL) return 0; if (config == NULL || picture->argb == NULL) { - err = VP8_ENC_ERROR_NULL_PARAMETER; - WebPEncodingSetError(picture, err); + WebPEncodingSetError(picture, VP8_ENC_ERROR_NULL_PARAMETER); return 0; } @@ -2064,13 +2127,13 @@ int VP8LEncodeImage(const WebPConfig* const config, initial_size = (config->image_hint == WEBP_HINT_GRAPH) ? width * height : width * height * 2; if (!VP8LBitWriterInit(&bw, initial_size)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } if (!WebPReportProgress(picture, 1, &percent)) { UserAbort: - err = VP8_ENC_ERROR_USER_ABORT; + WebPEncodingSetError(picture, VP8_ENC_ERROR_USER_ABORT); goto Error; } // Reset stats (for pure lossless coding) @@ -2086,28 +2149,26 @@ int VP8LEncodeImage(const WebPConfig* const config, // Write image size. if (!WriteImageSize(picture, &bw)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } has_alpha = WebPPictureHasTransparency(picture); // Write the non-trivial Alpha flag and lossless version. if (!WriteRealAlphaAndVersion(&bw, has_alpha)) { - err = VP8_ENC_ERROR_OUT_OF_MEMORY; + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); goto Error; } - if (!WebPReportProgress(picture, 5, &percent)) goto UserAbort; + if (!WebPReportProgress(picture, 2, &percent)) goto UserAbort; // Encode main image stream. - err = VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/); - if (err != VP8_ENC_OK) goto Error; + if (!VP8LEncodeStream(config, picture, &bw, 1 /*use_cache*/)) goto Error; - if (!WebPReportProgress(picture, 90, &percent)) goto UserAbort; + if (!WebPReportProgress(picture, 99, &percent)) goto UserAbort; // Finish the RIFF chunk. - err = WriteImage(picture, &bw, &coded_size); - if (err != VP8_ENC_OK) goto Error; + if (!WriteImage(picture, &bw, &coded_size)) goto Error; if (!WebPReportProgress(picture, 100, &percent)) goto UserAbort; @@ -2126,13 +2187,11 @@ int VP8LEncodeImage(const WebPConfig* const config, } Error: - if (bw.error_) err = VP8_ENC_ERROR_OUT_OF_MEMORY; - VP8LBitWriterWipeOut(&bw); - if (err != VP8_ENC_OK) { - WebPEncodingSetError(picture, err); - return 0; + if (bw.error_) { + WebPEncodingSetError(picture, VP8_ENC_ERROR_OUT_OF_MEMORY); } - return 1; + VP8LBitWriterWipeOut(&bw); + return (picture->error_code == VP8_ENC_OK); } //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/enc/vp8li_enc.h b/thirdparty/libwebp/src/enc/vp8li_enc.h index 00de48946c7..3d35e1612de 100644 --- a/thirdparty/libwebp/src/enc/vp8li_enc.h +++ b/thirdparty/libwebp/src/enc/vp8li_enc.h @@ -89,9 +89,10 @@ int VP8LEncodeImage(const WebPConfig* const config, // Encodes the main image stream using the supplied bit writer. // If 'use_cache' is false, disables the use of color cache. -WebPEncodingError VP8LEncodeStream(const WebPConfig* const config, - const WebPPicture* const picture, - VP8LBitWriter* const bw, int use_cache); +// Returns false in case of error (stored in picture->error_code). +int VP8LEncodeStream(const WebPConfig* const config, + const WebPPicture* const picture, VP8LBitWriter* const bw, + int use_cache); #if (WEBP_NEAR_LOSSLESS == 1) // in near_lossless.c @@ -103,13 +104,18 @@ int VP8ApplyNearLossless(const WebPPicture* const picture, int quality, //------------------------------------------------------------------------------ // Image transforms in predictor.c. -void VP8LResidualImage(int width, int height, int bits, int low_effort, - uint32_t* const argb, uint32_t* const argb_scratch, - uint32_t* const image, int near_lossless, int exact, - int used_subtract_green); +// pic and percent are for progress. +// Returns false in case of error (stored in pic->error_code). +int VP8LResidualImage(int width, int height, int bits, int low_effort, + uint32_t* const argb, uint32_t* const argb_scratch, + uint32_t* const image, int near_lossless, int exact, + int used_subtract_green, const WebPPicture* const pic, + int percent_range, int* const percent); -void VP8LColorSpaceTransform(int width, int height, int bits, int quality, - uint32_t* const argb, uint32_t* image); +int VP8LColorSpaceTransform(int width, int height, int bits, int quality, + uint32_t* const argb, uint32_t* image, + const WebPPicture* const pic, int percent_range, + int* const percent); //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/enc/webp_enc.c b/thirdparty/libwebp/src/enc/webp_enc.c index ce2db2e94bc..9620e050706 100644 --- a/thirdparty/libwebp/src/enc/webp_enc.c +++ b/thirdparty/libwebp/src/enc/webp_enc.c @@ -336,9 +336,7 @@ int WebPEncode(const WebPConfig* config, WebPPicture* pic) { if (!WebPValidateConfig(config)) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_INVALID_CONFIGURATION); } - if (pic->width <= 0 || pic->height <= 0) { - return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); - } + if (!WebPValidatePicture(pic)) return 0; if (pic->width > WEBP_MAX_DIMENSION || pic->height > WEBP_MAX_DIMENSION) { return WebPEncodingSetError(pic, VP8_ENC_ERROR_BAD_DIMENSION); } diff --git a/thirdparty/libwebp/src/mux/muxedit.c b/thirdparty/libwebp/src/mux/muxedit.c index 02c3edecd74..63e71a0aba3 100644 --- a/thirdparty/libwebp/src/mux/muxedit.c +++ b/thirdparty/libwebp/src/mux/muxedit.c @@ -70,6 +70,7 @@ void WebPMuxDelete(WebPMux* mux) { err = ChunkAssignData(&chunk, data, copy_data, tag); \ if (err == WEBP_MUX_OK) { \ err = ChunkSetHead(&chunk, (LIST)); \ + if (err != WEBP_MUX_OK) ChunkRelease(&chunk); \ } \ return err; \ } diff --git a/thirdparty/libwebp/src/mux/muxi.h b/thirdparty/libwebp/src/mux/muxi.h index d9bf9b37707..0f4af1784d7 100644 --- a/thirdparty/libwebp/src/mux/muxi.h +++ b/thirdparty/libwebp/src/mux/muxi.h @@ -29,7 +29,7 @@ extern "C" { #define MUX_MAJ_VERSION 1 #define MUX_MIN_VERSION 2 -#define MUX_REV_VERSION 2 +#define MUX_REV_VERSION 4 // Chunk object. typedef struct WebPChunk WebPChunk; diff --git a/thirdparty/libwebp/src/mux/muxinternal.c b/thirdparty/libwebp/src/mux/muxinternal.c index b9ee6717d3a..75b6b416b99 100644 --- a/thirdparty/libwebp/src/mux/muxinternal.c +++ b/thirdparty/libwebp/src/mux/muxinternal.c @@ -155,17 +155,18 @@ WebPMuxError ChunkSetHead(WebPChunk* const chunk, WebPMuxError ChunkAppend(WebPChunk* const chunk, WebPChunk*** const chunk_list) { + WebPMuxError err; assert(chunk_list != NULL && *chunk_list != NULL); if (**chunk_list == NULL) { - ChunkSetHead(chunk, *chunk_list); + err = ChunkSetHead(chunk, *chunk_list); } else { WebPChunk* last_chunk = **chunk_list; while (last_chunk->next_ != NULL) last_chunk = last_chunk->next_; - ChunkSetHead(chunk, &last_chunk->next_); - *chunk_list = &last_chunk->next_; + err = ChunkSetHead(chunk, &last_chunk->next_); + if (err == WEBP_MUX_OK) *chunk_list = &last_chunk->next_; } - return WEBP_MUX_OK; + return err; } //------------------------------------------------------------------------------ diff --git a/thirdparty/libwebp/src/webp/encode.h b/thirdparty/libwebp/src/webp/encode.h index b4c599df876..56b68e2f10e 100644 --- a/thirdparty/libwebp/src/webp/encode.h +++ b/thirdparty/libwebp/src/webp/encode.h @@ -441,7 +441,7 @@ WEBP_EXTERN int WebPPictureCrop(WebPPicture* picture, // the original dimension will be lost). Picture 'dst' need not be initialized // with WebPPictureInit() if it is different from 'src', since its content will // be overwritten. -// Returns false in case of memory allocation error or invalid parameters. +// Returns false in case of invalid parameters. WEBP_EXTERN int WebPPictureView(const WebPPicture* src, int left, int top, int width, int height, WebPPicture* dst); @@ -455,7 +455,7 @@ WEBP_EXTERN int WebPPictureIsView(const WebPPicture* picture); // dimension will be calculated preserving the aspect ratio. // No gamma correction is applied. // Returns false in case of error (invalid parameter or insufficient memory). -WEBP_EXTERN int WebPPictureRescale(WebPPicture* pic, int width, int height); +WEBP_EXTERN int WebPPictureRescale(WebPPicture* picture, int width, int height); // Colorspace conversion function to import RGB samples. // Previous buffer will be free'd, if any. @@ -526,7 +526,7 @@ WEBP_EXTERN int WebPPictureHasTransparency(const WebPPicture* picture); // Remove the transparency information (if present) by blending the color with // the background color 'background_rgb' (specified as 24bit RGB triplet). // After this call, all alpha values are reset to 0xff. -WEBP_EXTERN void WebPBlendAlpha(WebPPicture* pic, uint32_t background_rgb); +WEBP_EXTERN void WebPBlendAlpha(WebPPicture* picture, uint32_t background_rgb); //------------------------------------------------------------------------------ // Main call From bf33de769a0615fbb8774d12c15844eb539a3490 Mon Sep 17 00:00:00 2001 From: Danil Alexeev Date: Tue, 28 Jun 2022 18:13:00 +0300 Subject: [PATCH 15/17] Add boot splash display time setting Implements #8867. (cherry picked from commit dad9683d11ca174cf50d0039fb0f05fb7439984f) --- doc/classes/ProjectSettings.xml | 3 +++ main/main.cpp | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 32cf034fd0b..1ddfb9e62ec 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -182,6 +182,9 @@ Path to an image used as the boot splash. If left empty, the default Godot Engine splash will be displayed instead. [b]Note:[/b] Only effective if [member application/boot_splash/show_image] is [code]true[/code]. + + Minimum boot splash display time (in milliseconds). It is not recommended to set too high values for this setting. + If [code]true[/code], displays the image specified in [member application/boot_splash/image] when the engine starts. If [code]false[/code], only displays the plain color specified in [member application/boot_splash/bg_color]. diff --git a/main/main.cpp b/main/main.cpp index c67e8b0baf5..b73d3526e65 100644 --- a/main/main.cpp +++ b/main/main.cpp @@ -1700,6 +1700,13 @@ bool Main::start() { } } + uint64_t minimum_time_msec = GLOBAL_DEF("application/boot_splash/minimum_display_time", 0); + ProjectSettings::get_singleton()->set_custom_property_info("application/boot_splash/minimum_display_time", + PropertyInfo(Variant::INT, + "application/boot_splash/minimum_display_time", + PROPERTY_HINT_RANGE, + "0,100,1,or_greater")); // No negative numbers. + #ifdef TOOLS_ENABLED if (doc_tool_path != "") { Engine::get_singleton()->set_editor_hint(true); // Needed to instance editor-only classes for their default values @@ -2182,6 +2189,14 @@ bool Main::start() { OS::get_singleton()->set_main_loop(main_loop); + if (minimum_time_msec) { + uint64_t minimum_time = 1000 * minimum_time_msec; + uint64_t elapsed_time = OS::get_singleton()->get_ticks_usec(); + if (elapsed_time < minimum_time) { + OS::get_singleton()->delay_usec(minimum_time - elapsed_time); + } + } + return true; } From fd91b2dbb9c4039a728da69e78d35afe2106b91e Mon Sep 17 00:00:00 2001 From: Jummit Date: Sun, 3 Jul 2022 17:22:06 +0200 Subject: [PATCH 16/17] Keep Perspective menu open on selection Makes the Perspective menu consistent with the View menu in the 3D viewport. This allows for quicker inspection of the scene, and makes missclicks more forgiving. (cherry picked from commit e3ab344af9839bbe1ffa690093acd9ea7ffde554) --- editor/plugins/spatial_editor_plugin.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/editor/plugins/spatial_editor_plugin.cpp b/editor/plugins/spatial_editor_plugin.cpp index 15743e976ff..ccd9fc76ed7 100644 --- a/editor/plugins/spatial_editor_plugin.cpp +++ b/editor/plugins/spatial_editor_plugin.cpp @@ -4076,6 +4076,7 @@ SpatialEditorViewport::SpatialEditorViewport(SpatialEditor *p_spatial_editor, Ed view_menu->set_flat(false); vbox->add_child(view_menu); view_menu->set_h_size_flags(0); + view_menu->get_popup()->set_hide_on_checkable_item_selection(false); view_menu->get_popup()->add_shortcut(ED_GET_SHORTCUT("spatial_editor/top_view"), VIEW_TOP); view_menu->get_popup()->add_shortcut(ED_GET_SHORTCUT("spatial_editor/bottom_view"), VIEW_BOTTOM); From 2c92594fa693ef3c5c14ee7379d9cb3ebf54f7f8 Mon Sep 17 00:00:00 2001 From: reduz Date: Wed, 22 Jun 2022 19:03:43 +0200 Subject: [PATCH 17/17] Simplify Subresource Saving Redo edited subresource (and resource) saving in a much more simplified way. I think this should work (unless I am missing something) and be faster than what is there. It should also supersede #55885. I am not 100% entirely convinced that this approach works, but I think it should so please test. (cherry picked from commit 9eb5f2a0d79fb761235e77d369ee2f38fceb094a) --- editor/editor_node.cpp | 65 ++++++++++++++++-------------------------- 1 file changed, 25 insertions(+), 40 deletions(-) diff --git a/editor/editor_node.cpp b/editor/editor_node.cpp index 9303d7d98ca..3d76ee8b415 100644 --- a/editor/editor_node.cpp +++ b/editor/editor_node.cpp @@ -1439,34 +1439,6 @@ bool EditorNode::_validate_scene_recursive(const String &p_filename, Node *p_nod return false; } -static bool _find_edited_resources(const Ref &p_resource, Set> &edited_resources) { - if (p_resource->is_edited()) { - edited_resources.insert(p_resource); - return true; - } - - List plist; - - p_resource->get_property_list(&plist); - - for (List::Element *E = plist.front(); E; E = E->next()) { - if (E->get().type == Variant::OBJECT && E->get().usage & PROPERTY_USAGE_STORAGE && !(E->get().usage & PROPERTY_USAGE_RESOURCE_NOT_PERSISTENT)) { - RES res = p_resource->get(E->get().name); - if (res.is_null()) { - continue; - } - if (res->get_path().is_resource_file()) { //not a subresource, continue - continue; - } - if (_find_edited_resources(res, edited_resources)) { - return true; - } - } - } - - return false; -} - int EditorNode::_save_external_resources() { //save external resources and its subresources if any was modified @@ -1476,28 +1448,41 @@ int EditorNode::_save_external_resources() { } flg |= ResourceSaver::FLAG_REPLACE_SUBRESOURCE_PATHS; - Set> edited_subresources; + Set edited_resources; int saved = 0; List> cached; ResourceCache::get_cached_resources(&cached); for (List>::Element *E = cached.front(); E; E = E->next()) { Ref res = E->get(); - if (!res->get_path().is_resource_file()) { + if (!res->is_edited()) { continue; } - //not only check if this resourec is edited, check contained subresources too - if (_find_edited_resources(res, edited_subresources)) { - ResourceSaver::save(res->get_path(), res, flg); - saved++; + + String path = res->get_path(); + if (path.begins_with("res://")) { + int subres_pos = path.find("::"); + if (subres_pos == -1) { + // Actual resource. + edited_resources.insert(path); + } else { + edited_resources.insert(path.substr(0, subres_pos)); + } } + + res->set_edited(false); } - // clear later, because user may have put the same subresource in two different resources, - // which will be shared until the next reload - - for (Set>::Element *E = edited_subresources.front(); E; E = E->next()) { - Ref res = E->get(); - res->set_edited(false); + for (Set::Element *E = edited_resources.front(); E; E = E->next()) { + Ref res = Ref(ResourceCache::get(E->get())); + if (!res.is_valid()) { + continue; // Maybe it was erased in a thread, who knows. + } + Ref ps = res; + if (ps.is_valid()) { + continue; // Do not save PackedScenes, this will mess up the editor. + } + ResourceSaver::save(res->get_path(), res, flg); + saved++; } return saved;